File size: 1,749 Bytes
76ea727
 
 
 
bb007d0
46a5905
76ea727
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd7f219
76ea727
 
95623d5
76ea727
 
46a5905
 
 
 
95623d5
46a5905
95623d5
20e2c73
95623d5
46a5905
90c7543
46a5905
 
76ea727
 
 
46a5905
 
 
95623d5
46a5905
76ea727
 
8c1eae6
46a5905
76ea727
95623d5
 
20e2c73
76ea727
95623d5
 
46a5905
95623d5
76ea727
 
95623d5
8c1eae6
76ea727
46a5905
 
76ea727
 
46a5905
 
76ea727
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
{
  "architectures": [
    "LlavaNextForConditionalGeneration"
  ],
  "dtype": "bfloat16",
  "ignore_index": -100,
  "image_grid_pinpoints": [
    [
      336,
      672
    ],
    [
      672,
      336
    ],
    [
      672,
      672
    ],
    [
      1008,
      336
    ],
    [
      336,
      1008
    ]
  ],
  "image_seq_length": 576,
  "image_token_index": 32000,
  "model_type": "llava_next",
  "multimodal_projector_bias": true,
  "projector_hidden_act": "gelu",
  "text_config": {
    "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
    "architectures": [
      "MistralForCausalLM"
    ],
    "attention_dropout": 0.0,
    "head_dim": null,
    "hidden_act": "silu",
    "hidden_size": 16,
    "initializer_range": 0.02,
    "intermediate_size": 14336,
    "layer_types": null,
    "max_position_embeddings": 32768,
    "model_type": "mistral",
    "num_attention_heads": 4,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "rms_norm_eps": 1e-05,
    "rope_theta": 1000000.0,
    "sliding_window": null,
    "use_cache": true,
    "vocab_size": 32064
  },
  "tie_word_embeddings": false,
  "transformers_version": "4.56.2",
  "use_image_newline_parameter": true,
  "vision_config": {
    "attention_dropout": 0.0,
    "hidden_act": "quick_gelu",
    "hidden_size": 16,
    "image_size": 336,
    "initializer_factor": 1.0,
    "initializer_range": 0.02,
    "intermediate_size": 4096,
    "layer_norm_eps": 1e-05,
    "model_type": "clip_vision_model",
    "num_attention_heads": 4,
    "num_channels": 3,
    "num_hidden_layers": 3,
    "patch_size": 14,
    "projection_dim": 768,
    "vocab_size": 32000
  },
  "vision_feature_layer": -2,
  "vision_feature_select_strategy": "default",
  "vocab_size": 32064
}