mamba-integer / config_mamba_integer_50m.json
JayLuci4's picture
Upload config_mamba_integer_50m.json with huggingface_hub
ab77669 verified
raw
history blame contribute delete
856 Bytes
{
"architectures": [
"MambaInteger50M"
],
"d_model": 512,
"n_layer": 16,
"vocab_size": 16384,
"ssm_cfg": {
"d_state": 64,
"dt_rank": 32,
"dt_min": 0.001,
"dt_max": 0.1,
"use_dyadic_scan": true,
"scale_bits": 15,
"n_heads": 16,
"d_head": 32,
"use_ssd": true,
"chunk_size": 64
},
"training": {
"seq_len": 1024,
"batch_size": 64,
"gradient_accumulation_steps": 2,
"total_steps": 200000,
"learning_rate": 6e-4,
"decay_lr": 6e-4,
"weight_decay": 0.05,
"grad_clip": 50.0,
"num_workers": 0,
"log_interval": 500,
"checkpoint_interval": 10000,
"use_amp": false,
"use_compile": false
},
"rms_norm_eps": 1e-05,
"use_bitshift_norm": true,
"weight_quantization": "bitnet_1.58",
"model_type": "mamba_integer",
"model_name": "mamba_integer_50m"
}