ksimplex-llm-prototype / config.json
AbstractPhil's picture
Epoch 20 checkpoint
8f6ef1c verified
{
"model": {
"vocab_size": 50257,
"max_seq_len": 256,
"embed_dim": 384,
"depth": 4,
"edim": 16,
"feat_dim": 96,
"hidden": 384,
"num_heads": 8,
"num_blocks": 8,
"dropout": 0.1,
"params": 54107168
},
"training": {
"batch_size": 12,
"seq_len": 256,
"lr": 0.0003,
"weight_decay": 0.1,
"num_epochs": 14,
"grad_clip": 1.0,
"ce_weight": 1.0,
"validity_weight": 0.1
},
"data": {
"train_tokens": 304222,
"val_tokens": 33803,
"vocab_size": 50257
},
"run_name": "run_1770236129"
}