Upload tiny Cohere2ForCausalLM

#1
by qgallouedec HF Staff - opened
config.json CHANGED
@@ -5,33 +5,40 @@
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
- "bos_token_id": 5,
 
9
  "dtype": "bfloat16",
10
- "eos_token_id": 255001,
11
  "head_dim": 2,
12
  "hidden_act": "silu",
13
  "hidden_size": 8,
14
  "initializer_range": 0.02,
15
  "intermediate_size": 32,
16
  "layer_norm_eps": 1e-05,
 
17
  "layer_types": [
18
  "sliding_attention",
19
  "sliding_attention"
20
  ],
21
- "logit_scale": 0.0625,
22
- "max_position_embeddings": 8192,
23
  "model_type": "cohere2",
24
  "num_attention_heads": 4,
25
  "num_hidden_layers": 2,
26
  "num_key_value_heads": 2,
 
27
  "pad_token_id": 0,
28
- "rope_parameters": {
29
- "rope_theta": 10000.0,
30
- "rope_type": "default"
31
- },
32
  "sliding_window": 4096,
33
- "tie_word_embeddings": true,
34
- "transformers_version": "5.2.0.dev0",
35
  "use_cache": true,
36
- "vocab_size": 261010
 
 
 
 
 
37
  }
 
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
+ "bos_token_id": 2,
9
+ "cache_implementation": "hybrid",
10
  "dtype": "bfloat16",
11
+ "eos_token_id": 3,
12
  "head_dim": 2,
13
  "hidden_act": "silu",
14
  "hidden_size": 8,
15
  "initializer_range": 0.02,
16
  "intermediate_size": 32,
17
  "layer_norm_eps": 1e-05,
18
+ "layer_switch": 4,
19
  "layer_types": [
20
  "sliding_attention",
21
  "sliding_attention"
22
  ],
23
+ "logit_scale": 1.0,
24
+ "max_position_embeddings": 500000,
25
  "model_type": "cohere2",
26
  "num_attention_heads": 4,
27
  "num_hidden_layers": 2,
28
  "num_key_value_heads": 2,
29
+ "order_of_interleaved_layers": "local_attn_first",
30
  "pad_token_id": 0,
31
+ "position_embedding_type": "rope_gptj",
32
+ "rope_scaling": null,
33
+ "rope_theta": 50000,
34
+ "rotary_pct": 1.0,
35
  "sliding_window": 4096,
36
+ "transformers_version": "4.56.2",
 
37
  "use_cache": true,
38
+ "use_embedding_sharing": true,
39
+ "use_gated_activation": true,
40
+ "use_parallel_block": true,
41
+ "use_parallel_embedding": false,
42
+ "use_qk_norm": false,
43
+ "vocab_size": 262144
44
  }
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "bos_token_id": 2,
4
  "eos_token_id": 3,
5
  "pad_token_id": 0,
6
- "transformers_version": "5.2.0.dev0"
7
  }
 
3
  "bos_token_id": 2,
4
  "eos_token_id": 3,
5
  "pad_token_id": 0,
6
+ "transformers_version": "4.56.2"
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c48a02ad4c440b11d5d77902b18899e2c2effbf3c91c4674d3c2bc7dcdc92c6
3
- size 4181920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:010b86adeb01e8dcb7d25d91e072bf4ca270a96f4ecc746b08d650b12b12da63
3
+ size 4200064
special_tokens_map.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|START_RESPONSE|>",
4
+ "<|END_RESPONSE|>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<BOS_TOKEN>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<|END_OF_TURN_TOKEN|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": {
21
+ "content": "<PAD>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "unk_token": {
28
+ "content": "<UNK>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84d150b8af762b3662bdadc1fbc8274bc535ef86c0d497d0a40469fe86d92368
3
- size 21376340
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2227ea9c52e8afb3f98bfed2679008b275f2664de69dfde174b374389eb0225d
3
+ size 21376527
tokenizer_config.json CHANGED
@@ -1,20 +1,209 @@
1
  {
 
 
2
  "add_prefix_space": false,
3
- "backend": "tokenizers",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  "bos_token": "<BOS_TOKEN>",
5
  "clean_up_tokenization_spaces": false,
6
- "cls_token": "<CLS>",
7
  "eos_token": "<|END_OF_TURN_TOKEN|>",
8
- "errors": "replace",
9
- "is_local": false,
10
  "legacy": true,
11
- "mask_token": "<MASK_TOKEN>",
12
  "model_max_length": 1000000000000000019884624838656,
13
  "pad_token": "<PAD>",
14
- "sep_token": "<SEP>",
15
  "sp_model_kwargs": {},
16
  "spaces_between_special_tokens": false,
17
  "tokenizer_class": "CohereTokenizer",
18
  "unk_token": "<UNK>",
19
- "use_default_system_prompt": false
 
20
  }
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "add_prefix_space": false,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<PAD>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<MASK_TOKEN>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "<BOS_TOKEN>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "3": {
31
+ "content": "<EOS_TOKEN>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "4": {
39
+ "content": "<UNK>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "5": {
47
+ "content": "<|START_OF_TURN_TOKEN|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "6": {
55
+ "content": "<|END_OF_TURN_TOKEN|>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": false,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
+ "7": {
63
+ "content": "<|USER_TOKEN|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": false,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "8": {
71
+ "content": "<|CHATBOT_TOKEN|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": false,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "9": {
79
+ "content": "<|SYSTEM_TOKEN|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": false,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "10": {
87
+ "content": "<|NEW_FILE|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": false,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "11": {
95
+ "content": "<|BEGINNING_OF_PREFIX_FIM_TOKEN|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "12": {
103
+ "content": "<|BEGINNING_OF_MIDDLE_FIM_TOKEN|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": false,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "13": {
111
+ "content": "<|BEGINNING_OF_SUFFIX_FIM_TOKEN|>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": false,
115
+ "single_word": false,
116
+ "special": true
117
+ },
118
+ "14": {
119
+ "content": "<|END_OF_MIDDLE_FIM_TOKEN|>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false,
124
+ "special": true
125
+ },
126
+ "261000": {
127
+ "content": "<|START_RESPONSE|>",
128
+ "lstrip": false,
129
+ "normalized": false,
130
+ "rstrip": false,
131
+ "single_word": false,
132
+ "special": true
133
+ },
134
+ "261001": {
135
+ "content": "<|END_RESPONSE|>",
136
+ "lstrip": false,
137
+ "normalized": false,
138
+ "rstrip": false,
139
+ "single_word": false,
140
+ "special": true
141
+ },
142
+ "261002": {
143
+ "content": "<|START_ACTION|>",
144
+ "lstrip": false,
145
+ "normalized": false,
146
+ "rstrip": false,
147
+ "single_word": false,
148
+ "special": true
149
+ },
150
+ "261003": {
151
+ "content": "<|END_ACTION|>",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false,
156
+ "special": true
157
+ },
158
+ "261004": {
159
+ "content": "<|START_TOOL_RESULT|>",
160
+ "lstrip": false,
161
+ "normalized": false,
162
+ "rstrip": false,
163
+ "single_word": false,
164
+ "special": true
165
+ },
166
+ "261005": {
167
+ "content": "<|END_TOOL_RESULT|>",
168
+ "lstrip": false,
169
+ "normalized": false,
170
+ "rstrip": false,
171
+ "single_word": false,
172
+ "special": true
173
+ },
174
+ "261006": {
175
+ "content": "<|START_THINKING|>",
176
+ "lstrip": false,
177
+ "normalized": false,
178
+ "rstrip": false,
179
+ "single_word": false,
180
+ "special": true
181
+ },
182
+ "261007": {
183
+ "content": "<|END_THINKING|>",
184
+ "lstrip": false,
185
+ "normalized": false,
186
+ "rstrip": false,
187
+ "single_word": false,
188
+ "special": true
189
+ }
190
+ },
191
+ "additional_special_tokens": [
192
+ "<|START_RESPONSE|>",
193
+ "<|END_RESPONSE|>"
194
+ ],
195
  "bos_token": "<BOS_TOKEN>",
196
  "clean_up_tokenization_spaces": false,
 
197
  "eos_token": "<|END_OF_TURN_TOKEN|>",
198
+ "extra_special_tokens": {},
 
199
  "legacy": true,
200
+ "merges_file": null,
201
  "model_max_length": 1000000000000000019884624838656,
202
  "pad_token": "<PAD>",
 
203
  "sp_model_kwargs": {},
204
  "spaces_between_special_tokens": false,
205
  "tokenizer_class": "CohereTokenizer",
206
  "unk_token": "<UNK>",
207
+ "use_default_system_prompt": false,
208
+ "vocab_file": null
209
  }