{
  "step": 50000,
  "loss": 5.999711477773722,
  "config": {
    "base_model_name": "meta-llama/Llama-3.1-8B-Instruct",
    "masking_type": "bidirectional",
    "batch_size": 16,
    "learning_rate": 0.0003,
    "weight_decay": 0.01,
    "grad_clip": 1.0,
    "max_steps": 50000,
    "max_length": 128,
    "target_tokens": 100000000,
    "save_interval": 5000,
    "log_interval": 50
  },
  "model_type": "CustomTransformerModel",
  "attention_type": "bidirectional",
  "training_tokens": 102400000,
  "compatible_with_inference": true,
  "accelerator": "unsloth_kernel_optimized",
  "training_mode": "pure_diffusion",
  "save_method": "unsloth_optimized",
  "timestamp": "2025-06-11T06:22:08.448859"
}