| { | |
| "step": 50000, | |
| "loss": 5.999711477773722, | |
| "config": { | |
| "base_model_name": "meta-llama/Llama-3.1-8B-Instruct", | |
| "masking_type": "bidirectional", | |
| "batch_size": 16, | |
| "learning_rate": 0.0003, | |
| "weight_decay": 0.01, | |
| "grad_clip": 1.0, | |
| "max_steps": 50000, | |
| "max_length": 128, | |
| "target_tokens": 100000000, | |
| "save_interval": 5000, | |
| "log_interval": 50 | |
| }, | |
| "model_type": "CustomTransformerModel", | |
| "attention_type": "bidirectional", | |
| "training_tokens": 102400000, | |
| "compatible_with_inference": true, | |
| "accelerator": "unsloth_kernel_optimized", | |
| "training_mode": "pure_diffusion", | |
| "save_method": "unsloth_optimized", | |
| "timestamp": "2025-06-11T06:22:08.448859" | |
| } |