File size: 1,972 Bytes
c315639 2379302 c315639 ca18b3a c315639 829f580 c315639 7af6956 c315639 ca18b3a c315639 4cb5a10 c315639 4cb5a10 829f580 c315639 829f580 ca18b3a 829f580 95f5cc5 c315639 ca18b3a c315639 829f580 c315639 768b27d c315639 7af6956 829f580 ca18b3a c315639 4cb5a10 c315639 4cb5a10 ca18b3a c315639 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | {
"architectures": [
"Gemma3ForConditionalGeneration"
],
"boi_token_index": 255999,
"dtype": "bfloat16",
"eoi_token_index": 256000,
"eos_token_id": [
1,
106
],
"image_token_index": 262144,
"initializer_range": 0.02,
"mm_tokens_per_image": 256,
"model_type": "gemma3",
"text_config": {
"_sliding_window_pattern": 6,
"attention_bias": false,
"attention_dropout": 0.0,
"attn_logit_softcapping": null,
"bos_token_id": 2,
"eos_token_id": 1,
"final_logit_softcapping": null,
"head_dim": 256,
"hidden_activation": "gelu_pytorch_tanh",
"hidden_size": 16,
"initializer_range": 0.02,
"intermediate_size": 10240,
"layer_types": [
"sliding_attention",
"sliding_attention"
],
"max_position_embeddings": 131072,
"model_type": "gemma3_text",
"num_attention_heads": 4,
"num_hidden_layers": 2,
"num_key_value_heads": 2,
"pad_token_id": 0,
"query_pre_attn_scalar": 256,
"rms_norm_eps": 1e-06,
"rope_parameters": {
"full_attention": {
"factor": 8.0,
"rope_theta": 1000000.0,
"rope_type": "linear"
},
"rope_theta": null,
"rope_type": "default",
"sliding_attention": {
"rope_theta": 10000.0,
"rope_type": "default"
}
},
"sliding_window": 1024,
"tie_word_embeddings": true,
"use_bidirectional_attention": false,
"use_cache": true,
"vocab_size": 262208
},
"tie_word_embeddings": true,
"transformers_version": "5.7.0.dev0",
"vision_config": {
"attention_dropout": 0.0,
"embed_dim": 64,
"hidden_act": "gelu_pytorch_tanh",
"hidden_size": 16,
"image_size": 224,
"intermediate_size": 4304,
"layer_norm_eps": 1e-06,
"model_type": "siglip_vision_model",
"num_attention_heads": 4,
"num_channels": 3,
"num_hidden_layers": 2,
"num_key_value_heads": 2,
"patch_size": 14,
"vision_use_head": false
}
}
|