SlitherCode commited on
Commit
99b5878
·
verified ·
1 Parent(s): f5605f0

TiktokenTokenizer: upload tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +25 -18
tokenizer_config.json CHANGED
@@ -1,19 +1,26 @@
1
  {
2
- "add_prefix_space": false,
3
- "backend": "tokenizers",
4
- "bos_token": "éĤĢ",
5
- "clean_up_tokenization_spaces": false,
6
- "eos_token": "éĤĢ",
7
- "errors": "replace",
8
- "extra_special_tokens": [
9
- "<|im_start|>",
10
- "<|im_end|>"
11
- ],
12
- "is_local": false,
13
- "local_files_only": false,
14
- "model_max_length": 32768,
15
- "pad_token": "éĤĢ",
16
- "split_special_tokens": false,
17
- "tokenizer_class": "Qwen2Tokenizer",
18
- "unk_token": null
19
- }
 
 
 
 
 
 
 
 
1
  {
2
+ "added_tokens_decoder": {
3
+ "100257": {
4
+ "content": "<|endoftext|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ }
11
+ },
12
+ "backend": "custom",
13
+ "bos_token": "<|endoftext|>",
14
+ "encoding_name": "cl100k_base",
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1000000000000000019884624838656,
17
+ "pad_token": "<|endoftext|>",
18
+ "tokenizer_class": "TiktokenTokenizer",
19
+ "unk_token": "<|endoftext|>",
20
+ "auto_map": {
21
+ "AutoTokenizer": [
22
+ null,
23
+ "tokenization_parchment.TiktokenTokenizer"
24
+ ]
25
+ }
26
+ }