AbstractPhil
/

ksimplex-llm-prototype

Model card Files Files and versions

ksimplex-llm-prototype / config.json

AbstractPhil's picture

Epoch 20 checkpoint

8f6ef1c verified 9 days ago

history blame contribute delete

571 Bytes

	{
	"model": {
	"vocab_size": 50257,
	"max_seq_len": 256,
	"embed_dim": 384,
	"depth": 4,
	"edim": 16,
	"feat_dim": 96,
	"hidden": 384,
	"num_heads": 8,
	"num_blocks": 8,
	"dropout": 0.1,
	"params": 54107168
	},
	"training": {
	"batch_size": 12,
	"seq_len": 256,
	"lr": 0.0003,
	"weight_decay": 0.1,
	"num_epochs": 14,
	"grad_clip": 1.0,
	"ce_weight": 1.0,
	"validity_weight": 0.1
	},
	"data": {
	"train_tokens": 304222,
	"val_tokens": 33803,
	"vocab_size": 50257
	},
	"run_name": "run_1770236129"
	}