{ "architectures": [ "GPT" ], "bias": false, "block_size": 2048, "dropout": 0.0, "intermediate_size": 10136, "kv_lora_rank": 512, "model_type": "gpt2", "n_embd": 3072, "n_head": 24, "n_layer": 24, "q_lora_rank": 1024, "qk_nope_head_dim": 128, "qk_rope_head_dim": 64, "torch_dtype": "float32", "transformers_version": "4.51.3", "vocab_size": 50304 }