{ "step": 50000, "loss": 5.999711477773722, "config": { "base_model_name": "meta-llama/Llama-3.1-8B-Instruct", "masking_type": "bidirectional", "batch_size": 16, "learning_rate": 0.0003, "weight_decay": 0.01, "grad_clip": 1.0, "max_steps": 50000, "max_length": 128, "target_tokens": 100000000, "save_interval": 5000, "log_interval": 50 }, "model_type": "CustomTransformerModel", "attention_type": "bidirectional", "training_tokens": 102400000, "compatible_with_inference": true, "accelerator": "unsloth_kernel_optimized", "training_mode": "pure_diffusion", "save_method": "unsloth_optimized", "timestamp": "2025-06-11T06:22:08.448859" }