rootxhacker
/

llama3-diffusion-exp

Model card Files Files and versions

llama3-diffusion-exp / metadata.json

rootxhacker's picture

Upload folder using huggingface_hub

9fd0c72 verified 8 months ago

history blame contribute delete

704 Bytes

	{
	"step": 50000,
	"loss": 5.999711477773722,
	"config": {
	"base_model_name": "meta-llama/Llama-3.1-8B-Instruct",
	"masking_type": "bidirectional",
	"batch_size": 16,
	"learning_rate": 0.0003,
	"weight_decay": 0.01,
	"grad_clip": 1.0,
	"max_steps": 50000,
	"max_length": 128,
	"target_tokens": 100000000,
	"save_interval": 5000,
	"log_interval": 50
	},
	"model_type": "CustomTransformerModel",
	"attention_type": "bidirectional",
	"training_tokens": 102400000,
	"compatible_with_inference": true,
	"accelerator": "unsloth_kernel_optimized",
	"training_mode": "pure_diffusion",
	"save_method": "unsloth_optimized",
	"timestamp": "2025-06-11T06:22:08.448859"
	}