Upload Gemma3ForConditionalGeneration

#8
by albertvillanova HF Staff - opened
Files changed (3) hide show
  1. config.json +19 -7
  2. generation_config.json +5 -5
  3. model.safetensors +2 -2
config.json CHANGED
@@ -18,6 +18,8 @@
18
  "attention_bias": false,
19
  "attention_dropout": 0.0,
20
  "attn_logit_softcapping": null,
 
 
21
  "final_logit_softcapping": null,
22
  "head_dim": 256,
23
  "hidden_activation": "gelu_pytorch_tanh",
@@ -33,26 +35,36 @@
33
  "num_attention_heads": 4,
34
  "num_hidden_layers": 2,
35
  "num_key_value_heads": 2,
 
36
  "query_pre_attn_scalar": 256,
37
  "rms_norm_eps": 1e-06,
38
- "rope_local_base_freq": 10000.0,
39
- "rope_scaling": {
40
- "factor": 8.0,
41
- "rope_type": "linear"
 
 
 
 
 
 
 
 
42
  },
43
- "rope_theta": 1000000.0,
44
  "sliding_window": 1024,
 
45
  "use_bidirectional_attention": false,
46
  "use_cache": true,
47
  "vocab_size": 262208
48
  },
49
- "transformers_version": "4.57.3",
 
50
  "vision_config": {
51
  "attention_dropout": 0.0,
52
  "embed_dim": 64,
53
  "hidden_act": "gelu_pytorch_tanh",
54
  "hidden_size": 16,
55
- "image_size": 896,
56
  "intermediate_size": 4304,
57
  "layer_norm_eps": 1e-06,
58
  "model_type": "siglip_vision_model",
 
18
  "attention_bias": false,
19
  "attention_dropout": 0.0,
20
  "attn_logit_softcapping": null,
21
+ "bos_token_id": 2,
22
+ "eos_token_id": 1,
23
  "final_logit_softcapping": null,
24
  "head_dim": 256,
25
  "hidden_activation": "gelu_pytorch_tanh",
 
35
  "num_attention_heads": 4,
36
  "num_hidden_layers": 2,
37
  "num_key_value_heads": 2,
38
+ "pad_token_id": 0,
39
  "query_pre_attn_scalar": 256,
40
  "rms_norm_eps": 1e-06,
41
+ "rope_parameters": {
42
+ "full_attention": {
43
+ "factor": 8.0,
44
+ "rope_theta": 1000000.0,
45
+ "rope_type": "linear"
46
+ },
47
+ "rope_theta": null,
48
+ "rope_type": "default",
49
+ "sliding_attention": {
50
+ "rope_theta": 10000.0,
51
+ "rope_type": "default"
52
+ }
53
  },
 
54
  "sliding_window": 1024,
55
+ "tie_word_embeddings": true,
56
  "use_bidirectional_attention": false,
57
  "use_cache": true,
58
  "vocab_size": 262208
59
  },
60
+ "tie_word_embeddings": true,
61
+ "transformers_version": "5.7.0.dev0",
62
  "vision_config": {
63
  "attention_dropout": 0.0,
64
  "embed_dim": 64,
65
  "hidden_act": "gelu_pytorch_tanh",
66
  "hidden_size": 16,
67
+ "image_size": 224,
68
  "intermediate_size": 4304,
69
  "layer_norm_eps": 1e-06,
70
  "model_type": "siglip_vision_model",
generation_config.json CHANGED
@@ -1,13 +1,13 @@
1
  {
 
2
  "bos_token_id": 2,
3
- "cache_implementation": "hybrid",
4
- "do_sample": true,
5
  "eos_token_id": [
6
  1,
7
  106
8
  ],
 
 
9
  "pad_token_id": 0,
10
- "top_k": 64,
11
- "top_p": 0.95,
12
- "transformers_version": "4.57.3"
13
  }
 
1
  {
2
+ "_from_model_config": true,
3
  "bos_token_id": 2,
 
 
4
  "eos_token_id": [
5
  1,
6
  106
7
  ],
8
+ "output_attentions": false,
9
+ "output_hidden_states": false,
10
  "pad_token_id": 0,
11
+ "transformers_version": "5.7.0.dev0",
12
+ "use_cache": true
 
13
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4631a121257799c3b9ced2e772035a5f0087bef1f383271434296f3197439a7c
3
- size 11287536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4290f604775b64587805e9a0552e14ca05a40437b23c3af350e641d0c857373
3
+ size 11164176