Greytechai OwenArli commited on
Commit
d9b0e88
·
0 Parent(s):

Duplicate from ArliAI/DS-R1-Distill-70B-ArliAI-RpR-v4-Large

Browse files

Co-authored-by: Owen Arliawan <OwenArli@users.noreply.huggingface.co>

.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
R1-RpR-v4-Master.json ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "instruct": {
3
+ "input_sequence": "<|User|>",
4
+ "output_sequence": "<|Assistant|>",
5
+ "last_output_sequence": "",
6
+ "system_sequence": "",
7
+ "stop_sequence": "",
8
+ "wrap": false,
9
+ "macro": true,
10
+ "names_behavior": "none",
11
+ "activation_regex": "",
12
+ "system_sequence_prefix": "",
13
+ "system_sequence_suffix": "",
14
+ "first_output_sequence": "",
15
+ "skip_examples": false,
16
+ "output_suffix": "<|end▁of▁sentence|>",
17
+ "input_suffix": "",
18
+ "system_suffix": "",
19
+ "user_alignment_message": "",
20
+ "system_same_as_user": true,
21
+ "last_system_sequence": "",
22
+ "first_input_sequence": "",
23
+ "last_input_sequence": "",
24
+ "names_force_groups": true,
25
+ "name": "DeepSeek-V2.5"
26
+ },
27
+ "context": {
28
+ "story_string": "{{#if system}}{{system}}\n{{/if}}{{#if wiBefore}}{{wiBefore}}\n{{/if}}{{#if description}}{{description}}\n{{/if}}{{#if personality}}{{char}}'s personality: {{personality}}\n{{/if}}{{#if scenario}}Scenario: {{scenario}}\n{{/if}}{{#if wiAfter}}{{wiAfter}}\n{{/if}}{{#if persona}}{{persona}}\n{{/if}}{{trim}}\n",
29
+ "example_separator": "",
30
+ "chat_start": "",
31
+ "use_stop_strings": false,
32
+ "names_as_stop_strings": true,
33
+ "always_force_name2": false,
34
+ "trim_sentences": false,
35
+ "single_line": false,
36
+ "name": "DeepSeek-V2.5"
37
+ },
38
+ "sysprompt": {
39
+ "name": "Blank-RpR",
40
+ "content": "",
41
+ "post_history": ""
42
+ },
43
+ "preset": {
44
+ "temp": 1,
45
+ "temperature_last": true,
46
+ "top_p": 1,
47
+ "top_k": 40,
48
+ "top_a": 0,
49
+ "tfs": 1,
50
+ "epsilon_cutoff": 0,
51
+ "eta_cutoff": 0,
52
+ "typical_p": 1,
53
+ "min_p": 0.02,
54
+ "rep_pen": 1,
55
+ "rep_pen_range": 0,
56
+ "rep_pen_decay": 0,
57
+ "rep_pen_slope": 0,
58
+ "no_repeat_ngram_size": 0,
59
+ "penalty_alpha": 0,
60
+ "num_beams": 1,
61
+ "length_penalty": 1,
62
+ "min_length": 0,
63
+ "encoder_rep_pen": 1,
64
+ "freq_pen": 0,
65
+ "presence_pen": 0,
66
+ "skew": 0,
67
+ "do_sample": true,
68
+ "early_stopping": false,
69
+ "dynatemp": false,
70
+ "min_temp": 0.5,
71
+ "max_temp": 3,
72
+ "dynatemp_exponent": 5.77,
73
+ "smoothing_factor": 0,
74
+ "smoothing_curve": 1,
75
+ "dry_allowed_length": 4,
76
+ "dry_multiplier": 0,
77
+ "dry_base": 1.75,
78
+ "dry_sequence_breakers": "[\"\\n\", \":\", \"*\"]",
79
+ "dry_penalty_last_n": 8192,
80
+ "add_bos_token": true,
81
+ "ban_eos_token": false,
82
+ "skip_special_tokens": false,
83
+ "mirostat_mode": 0,
84
+ "mirostat_tau": 5,
85
+ "mirostat_eta": 0.1,
86
+ "guidance_scale": 1,
87
+ "negative_prompt": "",
88
+ "grammar_string": "",
89
+ "json_schema": {},
90
+ "banned_tokens": "",
91
+ "sampler_priority": [
92
+ "repetition_penalty",
93
+ "presence_penalty",
94
+ "frequency_penalty",
95
+ "dry",
96
+ "dynamic_temperature",
97
+ "top_p",
98
+ "top_k",
99
+ "typical_p",
100
+ "epsilon_cutoff",
101
+ "eta_cutoff",
102
+ "tfs",
103
+ "top_a",
104
+ "mirostat",
105
+ "min_p",
106
+ "quadratic_sampling",
107
+ "temperature",
108
+ "xtc",
109
+ "encoder_repetition_penalty",
110
+ "no_repeat_ngram"
111
+ ],
112
+ "samplers": [
113
+ "penalties",
114
+ "dry",
115
+ "top_n_sigma",
116
+ "top_k",
117
+ "typ_p",
118
+ "tfs_z",
119
+ "typical_p",
120
+ "top_p",
121
+ "min_p",
122
+ "xtc",
123
+ "temperature"
124
+ ],
125
+ "samplers_priorities": [
126
+ "dry",
127
+ "penalties",
128
+ "no_repeat_ngram",
129
+ "temperature",
130
+ "top_nsigma",
131
+ "top_p_top_k",
132
+ "top_a",
133
+ "min_p",
134
+ "tfs",
135
+ "eta_cutoff",
136
+ "epsilon_cutoff",
137
+ "typical_p",
138
+ "quadratic",
139
+ "xtc"
140
+ ],
141
+ "ignore_eos_token": false,
142
+ "spaces_between_special_tokens": false,
143
+ "speculative_ngram": false,
144
+ "sampler_order": [
145
+ 6,
146
+ 0,
147
+ 1,
148
+ 3,
149
+ 4,
150
+ 2,
151
+ 5
152
+ ],
153
+ "logit_bias": [],
154
+ "xtc_threshold": 0,
155
+ "xtc_probability": 0,
156
+ "nsigma": 0,
157
+ "min_keep": 0,
158
+ "rep_pen_size": 0,
159
+ "genamt": 4096,
160
+ "max_length": 16384,
161
+ "name": "RpR"
162
+ },
163
+ "reasoning": {
164
+ "name": "DeepSeek",
165
+ "prefix": "<think>",
166
+ "suffix": "</think>",
167
+ "separator": "\n\n"
168
+ }
169
+ }
README.md ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: llama3.3
3
+ thumbnail: https://cdn-uploads.huggingface.co/production/uploads/6625f4a8a8d1362ebcc3851a/hIZ2ZcaDyfYLT9Yd4pfOs.jpeg
4
+ language:
5
+ - en
6
+ base_model:
7
+ - deepseek-ai/DeepSeek-R1-Distill-Llama-70B
8
+ library_name: transformers
9
+ pipeline_tag: text-generation
10
+ ---
11
+ # DS-R1-Distill-70B-ArliAI-RpR-v4-Large
12
+
13
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/6625f4a8a8d1362ebcc3851a/hIZ2ZcaDyfYLT9Yd4pfOs.jpeg" alt="clickbait" width="500">
14
+
15
+ <small>Image generated using Arli AI Image Generation https://www.arliai.com/image-generation</small>
16
+
17
+ # Different RpR Versions
18
+
19
+ [Small - 8B](https://huggingface.co/ArliAI/DS-R1-Qwen3-8B-ArliAI-RpR-v4-Small) | [Fast - 30B-A3B](https://huggingface.co/ArliAI/Qwen3-30B-A3B-ArliAI-RpR-v4-Fast) | [OG - 32B](https://huggingface.co/ArliAI/QwQ-32B-ArliAI-RpR-v4) | [Large - 70B](https://huggingface.co/ArliAI/DS-R1-Distill-70B-ArliAI-RpR-v4-Large)
20
+
21
+ ## RpR v4 Changes:
22
+
23
+ The best RP/creative model series from ArliAI yet again. This time made based on deepseek-ai/DeepSeek-R1-Distill-Llama-70B for the largest activated a parameter of any RpR model yet.
24
+
25
+ - Reduced repetitions and impersonation
26
+
27
+ To add to the creativity and out of the box thinking of RpR v3, a more advanced filtering method was used in order to remove examples where the LLM repeated similar phrases or talked for the user. Any repetition or impersonation cases that happens will be due to how the base QwQ model was trained, and not because of the RpR dataset.
28
+
29
+ - Increased training sequence length
30
+
31
+ The training sequence length was increased to 16K in order to help awareness and memory even on longer chats.
32
+
33
+ ## RpR Series Overview: Building on RPMax with Reasoning
34
+
35
+ RpR (RolePlay with Reasoning) is a new series of models from ArliAI. This series **builds directly upon the successful dataset curation methodology and training methods developed for the RPMax series**.
36
+
37
+ RpR models use the same curated, deduplicated RP and creative writing dataset used for RPMax, with a focus on variety to ensure high creativity and minimize cross-context repetition. Users familiar with RPMax will recognize the unique, non-repetitive writing style unlike other finetuned-for-RP models.
38
+
39
+ With the release of QwQ as the first high performing open-source reasoning model that can be easily trained, it was clear that the available instruct and creative writing reasoning datasets contains only one response per example. This is type of single response dataset used for training reasoning models causes degraded output quality in long multi-turn chats. Which is why Arli AI decided to create a real RP model capable of long multi-turn chat with reasoning.
40
+
41
+ In order to create RpR, we first had to actually create the reasoning RP dataset by re-processing our existing known-good RPMax dataset into a reasoning dataset. This was possible by using the base QwQ Instruct model itself to create the reasoning process for every turn in the RPMax dataset conversation examples, which is then further refined in order to make sure the reasoning is in-line with the actual response examples from the dataset.
42
+
43
+ Another important thing to get right is to make sure the model is trained on examples that present reasoning blocks in the same way as it encounters it during inference. Which is, never seeing the reasoning blocks in it's context. In order to do this, the training run was completed using axolotl with manual template-free segments dataset in order to make sure that the model is never trained to see the reasoning block in the context. Just like how the model will be used during inference time.
44
+
45
+ The result of training on this dataset with this method are consistently coherent and interesting outputs even in long multi-turn RP chats. This is as far as we know the first true correctly-trained reasoning model trained for RP and creative writing.
46
+
47
+ You can access the model at https://arliai.com and we also have a models ranking page at https://www.arliai.com/models-ranking
48
+
49
+ Ask questions in our new Discord Server https://discord.com/invite/t75KbPgwhk or on our subreddit https://www.reddit.com/r/ArliAI/
50
+
51
+ ## Model Description
52
+
53
+ DS-R1-Distill-70B-ArliAI-RpR-v4-Large is part of the RpR v4 series. It is a 70-billion parameter model fine-tuned using the RpR dataset based on the curated RPMax dataset combined with techniques to maintain reasoning abilities in long multi-turn chats.
54
+
55
+ ### Recommended Samplers
56
+
57
+ - RpR models does not work well with repetition penalty type of samplers, even more advanced ones such as XTC or DRY.
58
+ - It works best with simple sampler settings and also being allowed to reason for a long time (high max tokens).
59
+ - You can download the ST master export uploaded in the files section of this repo as well.
60
+
61
+ Recommended to first start with:
62
+
63
+ * **Temperature**: 1.0
64
+ * **MinP**: 0.02
65
+ * **TopK**: 40
66
+ * **Response Tokens**: 2048+
67
+
68
+ ### Specs
69
+
70
+ * **Base Model**: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
71
+ * **Max Context Length**: 32K
72
+ * **Parameters**: 70B
73
+ * **Reasoning Model**: Yes
74
+
75
+ ### Training Details
76
+
77
+ * **Sequence Length**: 4096
78
+ * **Epochs**: 1 epoch training (Inherited from RPMax methods)
79
+ * **Fine-tuning Method**: RS-QLORA (Rank-Stabilized LoRA)
80
+ * **Rank/Alpha**: 64-rank 64-alpha
81
+ * **Learning Rate**: 0.00001
82
+ * **Scheduler**: Constant
83
+ * **Gradient accumulation**: 32
84
+
85
+ ### Very Nice Training graphs :)
86
+
87
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/6625f4a8a8d1362ebcc3851a/J-cD7mjdIG58BsSPpuS6x.png" alt="Train Loss" width="600">
88
+
89
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/6625f4a8a8d1362ebcc3851a/T890dqrUcBYnlOzK7MXrU.png" alt="Eval Loss" width="600">
90
+
91
+ ### Quantization
92
+
93
+ * **BF16**: https://huggingface.co/ArliAI/DS-R1-Distill-70B-ArliAI-RpR-v4-Large
94
+ * **GGUF**: https://huggingface.co/ArliAI/DS-R1-Distill-70B-ArliAI-RpR-v4-Large-GGUF
95
+
96
+ ### How to use reasoning models correctly in ST
97
+
98
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/6625f4a8a8d1362ebcc3851a/njVt2Vir8Isd3ApjTBmoI.png" alt="RpR ST Settings" width="600">
99
+
100
+ For any reasoning models in general, you need to make sure to set:
101
+
102
+ * Prefix is set to ONLY \<think> and the suffix is set to ONLY \</think> without any spaces or newlines (enter)
103
+
104
+ * Reply starts with \<think>
105
+
106
+ * Always add character names is unchecked
107
+
108
+ * Include names is set to never
109
+
110
+ * As always the chat template should also conform to the model being used
111
+
112
+ Note: Reasoning models work properly only if include names is set to never, since they always expect the eos token of the user turn followed by the \<think> token in order to start reasoning before outputting their response. If you set include names to enabled, then it will always append the character name at the end like "Seraphina:\<eos_token>" which confuses the model on whether it should respond or reason first.
113
+
114
+ The rest of your sampler parameters can be set as you wish as usual.
115
+
116
+ If you don't see the reasoning wrapped inside the thinking block, then either your settings is still wrong and doesn't follow my example or that your ST version is too old without reasoning block auto parsing.
117
+
118
+ If you see the whole response is in the reasoning block, then your \<think> and \</think> reasoning token suffix and prefix might have an extra space or newline. Or the model just isn't a reasoning model that is smart enough to always put reasoning in between those tokens.
119
+
120
+ ### If you set everything up correctly, it should look like this:
121
+
122
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/6625f4a8a8d1362ebcc3851a/wFQC8Df9dLaiQGnIg_iEo.png" alt="RpR example response" width="600">
123
+
124
+ ---
125
+
126
+ <details>
127
+ <summary>Details: The RPMax Foundation (Dataset & Training Philosophy)</summary>
128
+
129
+ *The following sections detail the core philosophy behind the dataset and training methodology originally developed for RPMax, which serves as the foundation for the RpR series.*
130
+
131
+ ### The Goal: Reduced Repetition and Higher Creativity
132
+
133
+ The goal of the dataset curation used for both RPMax and RpR is to reduce repetitions and increase the models ability to creatively write in different situations presented to it. What this means is it is a model that will output responses very differently without falling into predictable tropes across different situations.
134
+
135
+ ### What is repetition and creativity?
136
+
137
+ First of all, creativity should mean the variety in output that the model is capable of creating. You should not confuse creativity with writing prose. When a model writes in a way that can be said to be pleasant like writers would write in a novel, this is not creative writing. This is just a model having a certain pleasant type of writing prose. So a model that writes nicely is not necessarily a creative model.
138
+
139
+ Repetition and creativity are essentially intertwined with each other, so if a model is repetitive then a model can also be said to be un-creative as it cannot write new things and can only repeat similar responses that it has created before. For repetition there are actually two very different forms of repetition.
140
+
141
+ **In-context repetition:** When people mention a model is repetitive, this usually mean a model that likes to repeat the same phrases in a single conversation. An example of this is when a model says that a character "flicks her hair and...." and then starts to prepend that "flicks her hair and..." into every other action that character does.
142
+
143
+ It can be said that the model is boring, but even in real people's writing it is possible that this kind of repetition could be intentional to subtly prove a point or showcase a character's traits in some scenarios. So this type of repetition is not always bad and completely discouraging a model from doing this does not always lead to improve a model's writing ability.
144
+
145
+ In this regard, RPMax and RpR is not yet focused on eliminating this type of repetition so there might be some in-context repetition that can be seen in the outputs. Eliminating this will be the next big step of the RPMax and RpR series of models.
146
+
147
+ **Cross-context repetition:** A second worse type of repetition is a model's tendency to repeat the same phrases or tropes in very different situations. An example is a model that likes to repeat the infamous "shivers down my spine" phrase in wildly different conversations that don't necessarily fit with that phrase.
148
+
149
+ This type of repetition is ALWAYS bad as it is a sign that the model has over-fitted into that style of "creative writing" that it has often seen in the training dataset. A model's tendency to have cross-context repetition is also usually visible in how a model likes to choose similar repetitive names when writing stories. Such as the infamous "elara" and "whispering woods" names.
150
+
151
+ The primary goal of the dataset curation for RPMax and RpR is to create a highly creative model by reducing cross-context repetition, as that is the type of repetition that follows you through different conversations. This is combated by making sure the dataset does not have repetitions of the same situations or characters in different example entries.
152
+
153
+ ### Dataset Curation
154
+
155
+ The success of models trained on this dataset (including RPMax and now RpR) is thanks to the training method and the unique dataset created for fine-tuning. It contains as many open source creative writing and RP datasets that can be found (all from Hugging Face), from which have been curated to weed out datasets that are purely synthetic generations as they often only serve to dumb down the model and make the model learn GPT-isms (slop) rather than help.
156
+
157
+ Then Llama 3.1 8B (or a similarly capable model) is used to create a database of the characters and situations that are portrayed in these datasets, which is then used to de-dupe these datasets to make sure that there is only a single entry of any character or situation.
158
+
159
+ ### The Golden Rule of Fine-Tuning
160
+
161
+ Unlike the initial pre-training stage where the more data you throw at it the better it becomes for the most part, the golden rule for fine-tuning models isn't quantity, but instead quality over quantity. So the dataset used here is actually orders of magnitude smaller than it would be if it included repeated characters and situations in the dataset, but the end result is a model that does not feel like just another "in-breed" of another creative writing/RP model.
162
+
163
+ ### Training Parameters and Unconventional Approach
164
+
165
+ The usual way is to have a low learning rate and high gradient accumulation for better loss stability, and then run multiple epochs of the training run until the loss is acceptable.
166
+
167
+ The RPMax and RpR methodology, however, uses only **one single epoch**, a low gradient accumulation, and a higher than normal learning rate. The loss curve during training is actually unstable and jumps up and down a lot, but if it is smoothed out, it is steadily decreasing over time. The theory is that this allows the models to learn from each individual example in the dataset much more, and by not showing the model the same example twice using multiple epochs, it stops the model from latching on and reinforcing a single character or story trope.
168
+
169
+ The jumping up and down of loss during training is because as the model gets trained on a new entry from the dataset, the model will have never seen a similar example before and therefore can't really predict an answer similar to the example entry. While the relatively high end loss of 1.0 or slightly above is actually acceptable because the goal was never to create a model that can output exactly like the dataset that is being used to train it. Rather to create a model that is creative enough to make up it's own style of responses.
170
+
171
+ This is different from training a model in a particular domain and needing the model to reliably be able to output like the example dataset, such as when training a model on a company's internal knowledge base.
172
+
173
+ </details>
174
+
175
+ ---
176
+
177
+ ## Try It Out!
178
+
179
+ Model preference is subjective, so please do try this model for yourself. Your feedback both good and bad is always valueable and will help us improve the future RPMax and RpR models.
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 128000,
8
+ "eos_token_id": 128001,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 8192,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 28672,
14
+ "max_position_embeddings": 131072,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 64,
18
+ "num_hidden_layers": 80,
19
+ "num_key_value_heads": 8,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_scaling": {
23
+ "factor": 8.0,
24
+ "high_freq_factor": 4.0,
25
+ "low_freq_factor": 1.0,
26
+ "original_max_position_embeddings": 8192,
27
+ "rope_type": "llama3"
28
+ },
29
+ "rope_theta": 500000.0,
30
+ "tie_word_embeddings": false,
31
+ "torch_dtype": "bfloat16",
32
+ "transformers_version": "4.51.3",
33
+ "use_cache": true,
34
+ "vocab_size": 128256
35
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "do_sample": true,
5
+ "eos_token_id": 128001,
6
+ "temperature": 0.6,
7
+ "top_p": 0.95,
8
+ "transformers_version": "4.51.3"
9
+ }
model-00001-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bebb29bafcc35493080c4b61ecb1c0a059d5bdcbe2c6ed62d1fab460be0ca60
3
+ size 4584408808
model-00002-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e466c1d888efddff7739b076b074532e88759b4d117d4ae34691ca288f18680a
3
+ size 4664167376
model-00003-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d9d2b26fafbccd1c5b63b80eb4440cc6904209b19cc65072e9a54a69cf8602b
3
+ size 4999711704
model-00004-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b45747f55811295b480901402e18ca9c2105da274f54f7ec5444659653ec17f8
3
+ size 4966157032
model-00005-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0e84455b1440a43ca7bdbf20060d5963e06918da32386a8699ad6d240e6bbba
3
+ size 4664134408
model-00006-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d90c373ca710ff80619d54a694856410911f1b3facc40d4a64aaf56e3eaf7a7c
3
+ size 4664167408
model-00007-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7fdf708d5e3e9d7c8fc97a0ea8fa5291b77e55e803d7964b5bc1895822f677c
3
+ size 4664167408
model-00008-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b10db6b7e50e21539485c126414a5aa256ca4d537aa1616d4d03a72cec522958
3
+ size 4999711728
model-00009-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:251a27c154451faf7cfe7c31f02c3004878cf904529e0c0aabad286bae7edf2f
3
+ size 4966157056
model-00010-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec4a6cdb1e0e8b9d728ba4cf7316924965df47fabb882e0b7b6ceaad745a9c45
3
+ size 4664134408
model-00011-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:307f5099fa987d730fbcf9a7fe6bae9208b653125451676f1b1b6a08d085b56f
3
+ size 4664167408
model-00012-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:960d08e50e199bb332b74cd93e89a9a914fd9e0c533b6ca62f913ab9012801f9
3
+ size 4664167408
model-00013-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:731ab1a51c89b4218e36c39c7cf948d42f8cc5b10a1c2514c3d28e159ddfabed
3
+ size 4999711728
model-00014-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5747a48af530a1974744321aef1abda74bfc106b14d47456a6c8de7f4ac11d97
3
+ size 4966157056
model-00015-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:758e9cd578d32a5e0375ad03d2726d040291a0c836b5da501d6a790fd11a55cb
3
+ size 4664134408
model-00016-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71a005b6a00107d265ca290c77db71e998aa78b54b9da651ccfa9eec812c8347
3
+ size 4664167408
model-00017-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0198f780695843572477eed95a6dc0822dd0f1c5703039bc58735272e53d4ad
3
+ size 4664167408
model-00018-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:295cc4c8293ede9980af6ff24a002fc32957f6d7eda273d2add3bb1294d78eb9
3
+ size 4999711728
model-00019-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:602e0a449686cf12188221b7dd8bcb751aae77a3a4b3fcb55f7855fc0e3496d5
3
+ size 4966157056
model-00020-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ac49134d0861c0800cf32c9307ecc174a0fd706f81bae88904d81e943319cb4
3
+ size 4664134408
model-00021-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2a36869d32464ead31c1b6c7df507e3f17bbefb061a303d0ee53d4f319f4d38
3
+ size 4664167408
model-00022-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04516bab8b0f8cffe11bc3f9b091e4e8c25bc856220d13b1cadcddd20e3182ad
3
+ size 4664167408
model-00023-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5bdf63fb14de264f9fee74d8c177558efc6175153ebf0316c180d3c83d035ca
3
+ size 4999711728
model-00024-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92e027e267af8cb8294f4dfcb0255886e8631e2d8609ced77e8fe4800c7be7c3
3
+ size 4966157056
model-00025-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6990157c6380a23b2ba3c1d8f757976b83317cf25e40fcb68aa619818d744d55
3
+ size 4664134408
model-00026-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c10829ec45f8e71a127186e2ed2485246fa220284aecd3bfe9dc97b409a27501
3
+ size 4664167408
model-00027-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:425613f04a6b62534efe0bd4c1318981b31ea8b331769ad54b6391423c3010f7
3
+ size 4664167408
model-00028-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e66c663d6285b0aae0c32bcf34c7e424b7c1e3f8709bf16e41b3680c060588b6
3
+ size 4999711728
model-00029-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04b84a14f604bbddb0aca3cffa236fcdf2368307b24b7e25514db146a16afa83
3
+ size 4966173536
model-00030-of-00030.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee0b056e97c867d5282cad7b62f782b41dc7cac7da2df623215ad8b85f611634
3
+ size 2101346432
model.safetensors.index.json ADDED
@@ -0,0 +1,730 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 141107412992
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00030-of-00030.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00030.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00030.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00030.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00030.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00030.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00030.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00030.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00030.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00030.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00030.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00002-of-00030.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00002-of-00030.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00030.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00002-of-00030.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00030.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00030.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00030.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00030.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00030.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00005-of-00030.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00005-of-00030.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00005-of-00030.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00005-of-00030.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00005-of-00030.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00005-of-00030.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00005-of-00030.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00005-of-00030.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00005-of-00030.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00005-of-00030.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00005-of-00030.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00005-of-00030.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00005-of-00030.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00005-of-00030.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00005-of-00030.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00005-of-00030.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00005-of-00030.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00005-of-00030.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00006-of-00030.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00006-of-00030.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00005-of-00030.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00005-of-00030.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00006-of-00030.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00005-of-00030.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00005-of-00030.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00005-of-00030.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00005-of-00030.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00006-of-00030.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00006-of-00030.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00006-of-00030.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00006-of-00030.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00006-of-00030.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00006-of-00030.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00006-of-00030.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00006-of-00030.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00006-of-00030.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00006-of-00030.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00006-of-00030.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00006-of-00030.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00006-of-00030.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00006-of-00030.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00006-of-00030.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00006-of-00030.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00006-of-00030.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00006-of-00030.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00007-of-00030.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00007-of-00030.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00006-of-00030.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00007-of-00030.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00007-of-00030.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00006-of-00030.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00006-of-00030.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00006-of-00030.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00006-of-00030.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00007-of-00030.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00007-of-00030.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00007-of-00030.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00007-of-00030.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00007-of-00030.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00007-of-00030.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00007-of-00030.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00007-of-00030.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00007-of-00030.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00007-of-00030.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00007-of-00030.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00007-of-00030.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00007-of-00030.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00007-of-00030.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00007-of-00030.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00007-of-00030.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00007-of-00030.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00007-of-00030.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00008-of-00030.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00008-of-00030.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00008-of-00030.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00008-of-00030.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00008-of-00030.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00007-of-00030.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00007-of-00030.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00007-of-00030.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00007-of-00030.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00008-of-00030.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00008-of-00030.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00008-of-00030.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00008-of-00030.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00008-of-00030.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00008-of-00030.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00008-of-00030.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00008-of-00030.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00008-of-00030.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00002-of-00030.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00002-of-00030.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00030.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00002-of-00030.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00030.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00030.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00030.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00030.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00030.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00008-of-00030.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00008-of-00030.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00008-of-00030.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00008-of-00030.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00008-of-00030.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00008-of-00030.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00008-of-00030.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00008-of-00030.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00008-of-00030.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00009-of-00030.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00009-of-00030.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00009-of-00030.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00009-of-00030.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00009-of-00030.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00008-of-00030.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00009-of-00030.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00008-of-00030.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00008-of-00030.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00009-of-00030.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00009-of-00030.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00009-of-00030.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00009-of-00030.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00009-of-00030.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00009-of-00030.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00009-of-00030.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00009-of-00030.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00009-of-00030.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00009-of-00030.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00009-of-00030.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00009-of-00030.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00009-of-00030.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00009-of-00030.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00009-of-00030.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00009-of-00030.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00009-of-00030.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00009-of-00030.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00010-of-00030.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00010-of-00030.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00010-of-00030.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00010-of-00030.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00010-of-00030.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00010-of-00030.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00010-of-00030.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00010-of-00030.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00010-of-00030.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00010-of-00030.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00010-of-00030.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00010-of-00030.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00010-of-00030.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00010-of-00030.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00010-of-00030.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00010-of-00030.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00010-of-00030.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00010-of-00030.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00011-of-00030.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00011-of-00030.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00010-of-00030.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00010-of-00030.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00011-of-00030.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00010-of-00030.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00010-of-00030.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00010-of-00030.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00010-of-00030.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00011-of-00030.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00011-of-00030.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00011-of-00030.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00011-of-00030.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00011-of-00030.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00011-of-00030.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00011-of-00030.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00011-of-00030.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00011-of-00030.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00011-of-00030.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00011-of-00030.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00011-of-00030.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00011-of-00030.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00011-of-00030.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00011-of-00030.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00011-of-00030.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00011-of-00030.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00011-of-00030.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00012-of-00030.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00012-of-00030.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00011-of-00030.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00012-of-00030.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00012-of-00030.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00011-of-00030.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00011-of-00030.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00011-of-00030.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00011-of-00030.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00002-of-00030.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00002-of-00030.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00030.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00002-of-00030.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00030.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00030.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00030.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00030.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00030.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00012-of-00030.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00012-of-00030.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00012-of-00030.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00012-of-00030.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00012-of-00030.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00012-of-00030.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00012-of-00030.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00012-of-00030.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00012-of-00030.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00012-of-00030.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00012-of-00030.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00012-of-00030.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00012-of-00030.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00012-of-00030.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00012-of-00030.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00012-of-00030.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00012-of-00030.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00012-of-00030.safetensors",
242
+ "model.layers.32.input_layernorm.weight": "model-00013-of-00030.safetensors",
243
+ "model.layers.32.mlp.down_proj.weight": "model-00013-of-00030.safetensors",
244
+ "model.layers.32.mlp.gate_proj.weight": "model-00013-of-00030.safetensors",
245
+ "model.layers.32.mlp.up_proj.weight": "model-00013-of-00030.safetensors",
246
+ "model.layers.32.post_attention_layernorm.weight": "model-00013-of-00030.safetensors",
247
+ "model.layers.32.self_attn.k_proj.weight": "model-00012-of-00030.safetensors",
248
+ "model.layers.32.self_attn.o_proj.weight": "model-00012-of-00030.safetensors",
249
+ "model.layers.32.self_attn.q_proj.weight": "model-00012-of-00030.safetensors",
250
+ "model.layers.32.self_attn.v_proj.weight": "model-00012-of-00030.safetensors",
251
+ "model.layers.33.input_layernorm.weight": "model-00013-of-00030.safetensors",
252
+ "model.layers.33.mlp.down_proj.weight": "model-00013-of-00030.safetensors",
253
+ "model.layers.33.mlp.gate_proj.weight": "model-00013-of-00030.safetensors",
254
+ "model.layers.33.mlp.up_proj.weight": "model-00013-of-00030.safetensors",
255
+ "model.layers.33.post_attention_layernorm.weight": "model-00013-of-00030.safetensors",
256
+ "model.layers.33.self_attn.k_proj.weight": "model-00013-of-00030.safetensors",
257
+ "model.layers.33.self_attn.o_proj.weight": "model-00013-of-00030.safetensors",
258
+ "model.layers.33.self_attn.q_proj.weight": "model-00013-of-00030.safetensors",
259
+ "model.layers.33.self_attn.v_proj.weight": "model-00013-of-00030.safetensors",
260
+ "model.layers.34.input_layernorm.weight": "model-00013-of-00030.safetensors",
261
+ "model.layers.34.mlp.down_proj.weight": "model-00013-of-00030.safetensors",
262
+ "model.layers.34.mlp.gate_proj.weight": "model-00013-of-00030.safetensors",
263
+ "model.layers.34.mlp.up_proj.weight": "model-00013-of-00030.safetensors",
264
+ "model.layers.34.post_attention_layernorm.weight": "model-00013-of-00030.safetensors",
265
+ "model.layers.34.self_attn.k_proj.weight": "model-00013-of-00030.safetensors",
266
+ "model.layers.34.self_attn.o_proj.weight": "model-00013-of-00030.safetensors",
267
+ "model.layers.34.self_attn.q_proj.weight": "model-00013-of-00030.safetensors",
268
+ "model.layers.34.self_attn.v_proj.weight": "model-00013-of-00030.safetensors",
269
+ "model.layers.35.input_layernorm.weight": "model-00014-of-00030.safetensors",
270
+ "model.layers.35.mlp.down_proj.weight": "model-00014-of-00030.safetensors",
271
+ "model.layers.35.mlp.gate_proj.weight": "model-00014-of-00030.safetensors",
272
+ "model.layers.35.mlp.up_proj.weight": "model-00014-of-00030.safetensors",
273
+ "model.layers.35.post_attention_layernorm.weight": "model-00014-of-00030.safetensors",
274
+ "model.layers.35.self_attn.k_proj.weight": "model-00013-of-00030.safetensors",
275
+ "model.layers.35.self_attn.o_proj.weight": "model-00014-of-00030.safetensors",
276
+ "model.layers.35.self_attn.q_proj.weight": "model-00013-of-00030.safetensors",
277
+ "model.layers.35.self_attn.v_proj.weight": "model-00013-of-00030.safetensors",
278
+ "model.layers.36.input_layernorm.weight": "model-00014-of-00030.safetensors",
279
+ "model.layers.36.mlp.down_proj.weight": "model-00014-of-00030.safetensors",
280
+ "model.layers.36.mlp.gate_proj.weight": "model-00014-of-00030.safetensors",
281
+ "model.layers.36.mlp.up_proj.weight": "model-00014-of-00030.safetensors",
282
+ "model.layers.36.post_attention_layernorm.weight": "model-00014-of-00030.safetensors",
283
+ "model.layers.36.self_attn.k_proj.weight": "model-00014-of-00030.safetensors",
284
+ "model.layers.36.self_attn.o_proj.weight": "model-00014-of-00030.safetensors",
285
+ "model.layers.36.self_attn.q_proj.weight": "model-00014-of-00030.safetensors",
286
+ "model.layers.36.self_attn.v_proj.weight": "model-00014-of-00030.safetensors",
287
+ "model.layers.37.input_layernorm.weight": "model-00014-of-00030.safetensors",
288
+ "model.layers.37.mlp.down_proj.weight": "model-00014-of-00030.safetensors",
289
+ "model.layers.37.mlp.gate_proj.weight": "model-00014-of-00030.safetensors",
290
+ "model.layers.37.mlp.up_proj.weight": "model-00014-of-00030.safetensors",
291
+ "model.layers.37.post_attention_layernorm.weight": "model-00014-of-00030.safetensors",
292
+ "model.layers.37.self_attn.k_proj.weight": "model-00014-of-00030.safetensors",
293
+ "model.layers.37.self_attn.o_proj.weight": "model-00014-of-00030.safetensors",
294
+ "model.layers.37.self_attn.q_proj.weight": "model-00014-of-00030.safetensors",
295
+ "model.layers.37.self_attn.v_proj.weight": "model-00014-of-00030.safetensors",
296
+ "model.layers.38.input_layernorm.weight": "model-00015-of-00030.safetensors",
297
+ "model.layers.38.mlp.down_proj.weight": "model-00015-of-00030.safetensors",
298
+ "model.layers.38.mlp.gate_proj.weight": "model-00015-of-00030.safetensors",
299
+ "model.layers.38.mlp.up_proj.weight": "model-00015-of-00030.safetensors",
300
+ "model.layers.38.post_attention_layernorm.weight": "model-00015-of-00030.safetensors",
301
+ "model.layers.38.self_attn.k_proj.weight": "model-00015-of-00030.safetensors",
302
+ "model.layers.38.self_attn.o_proj.weight": "model-00015-of-00030.safetensors",
303
+ "model.layers.38.self_attn.q_proj.weight": "model-00015-of-00030.safetensors",
304
+ "model.layers.38.self_attn.v_proj.weight": "model-00015-of-00030.safetensors",
305
+ "model.layers.39.input_layernorm.weight": "model-00015-of-00030.safetensors",
306
+ "model.layers.39.mlp.down_proj.weight": "model-00015-of-00030.safetensors",
307
+ "model.layers.39.mlp.gate_proj.weight": "model-00015-of-00030.safetensors",
308
+ "model.layers.39.mlp.up_proj.weight": "model-00015-of-00030.safetensors",
309
+ "model.layers.39.post_attention_layernorm.weight": "model-00015-of-00030.safetensors",
310
+ "model.layers.39.self_attn.k_proj.weight": "model-00015-of-00030.safetensors",
311
+ "model.layers.39.self_attn.o_proj.weight": "model-00015-of-00030.safetensors",
312
+ "model.layers.39.self_attn.q_proj.weight": "model-00015-of-00030.safetensors",
313
+ "model.layers.39.self_attn.v_proj.weight": "model-00015-of-00030.safetensors",
314
+ "model.layers.4.input_layernorm.weight": "model-00003-of-00030.safetensors",
315
+ "model.layers.4.mlp.down_proj.weight": "model-00003-of-00030.safetensors",
316
+ "model.layers.4.mlp.gate_proj.weight": "model-00003-of-00030.safetensors",
317
+ "model.layers.4.mlp.up_proj.weight": "model-00003-of-00030.safetensors",
318
+ "model.layers.4.post_attention_layernorm.weight": "model-00003-of-00030.safetensors",
319
+ "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00030.safetensors",
320
+ "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00030.safetensors",
321
+ "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00030.safetensors",
322
+ "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00030.safetensors",
323
+ "model.layers.40.input_layernorm.weight": "model-00016-of-00030.safetensors",
324
+ "model.layers.40.mlp.down_proj.weight": "model-00016-of-00030.safetensors",
325
+ "model.layers.40.mlp.gate_proj.weight": "model-00015-of-00030.safetensors",
326
+ "model.layers.40.mlp.up_proj.weight": "model-00015-of-00030.safetensors",
327
+ "model.layers.40.post_attention_layernorm.weight": "model-00016-of-00030.safetensors",
328
+ "model.layers.40.self_attn.k_proj.weight": "model-00015-of-00030.safetensors",
329
+ "model.layers.40.self_attn.o_proj.weight": "model-00015-of-00030.safetensors",
330
+ "model.layers.40.self_attn.q_proj.weight": "model-00015-of-00030.safetensors",
331
+ "model.layers.40.self_attn.v_proj.weight": "model-00015-of-00030.safetensors",
332
+ "model.layers.41.input_layernorm.weight": "model-00016-of-00030.safetensors",
333
+ "model.layers.41.mlp.down_proj.weight": "model-00016-of-00030.safetensors",
334
+ "model.layers.41.mlp.gate_proj.weight": "model-00016-of-00030.safetensors",
335
+ "model.layers.41.mlp.up_proj.weight": "model-00016-of-00030.safetensors",
336
+ "model.layers.41.post_attention_layernorm.weight": "model-00016-of-00030.safetensors",
337
+ "model.layers.41.self_attn.k_proj.weight": "model-00016-of-00030.safetensors",
338
+ "model.layers.41.self_attn.o_proj.weight": "model-00016-of-00030.safetensors",
339
+ "model.layers.41.self_attn.q_proj.weight": "model-00016-of-00030.safetensors",
340
+ "model.layers.41.self_attn.v_proj.weight": "model-00016-of-00030.safetensors",
341
+ "model.layers.42.input_layernorm.weight": "model-00016-of-00030.safetensors",
342
+ "model.layers.42.mlp.down_proj.weight": "model-00016-of-00030.safetensors",
343
+ "model.layers.42.mlp.gate_proj.weight": "model-00016-of-00030.safetensors",
344
+ "model.layers.42.mlp.up_proj.weight": "model-00016-of-00030.safetensors",
345
+ "model.layers.42.post_attention_layernorm.weight": "model-00016-of-00030.safetensors",
346
+ "model.layers.42.self_attn.k_proj.weight": "model-00016-of-00030.safetensors",
347
+ "model.layers.42.self_attn.o_proj.weight": "model-00016-of-00030.safetensors",
348
+ "model.layers.42.self_attn.q_proj.weight": "model-00016-of-00030.safetensors",
349
+ "model.layers.42.self_attn.v_proj.weight": "model-00016-of-00030.safetensors",
350
+ "model.layers.43.input_layernorm.weight": "model-00017-of-00030.safetensors",
351
+ "model.layers.43.mlp.down_proj.weight": "model-00017-of-00030.safetensors",
352
+ "model.layers.43.mlp.gate_proj.weight": "model-00016-of-00030.safetensors",
353
+ "model.layers.43.mlp.up_proj.weight": "model-00017-of-00030.safetensors",
354
+ "model.layers.43.post_attention_layernorm.weight": "model-00017-of-00030.safetensors",
355
+ "model.layers.43.self_attn.k_proj.weight": "model-00016-of-00030.safetensors",
356
+ "model.layers.43.self_attn.o_proj.weight": "model-00016-of-00030.safetensors",
357
+ "model.layers.43.self_attn.q_proj.weight": "model-00016-of-00030.safetensors",
358
+ "model.layers.43.self_attn.v_proj.weight": "model-00016-of-00030.safetensors",
359
+ "model.layers.44.input_layernorm.weight": "model-00017-of-00030.safetensors",
360
+ "model.layers.44.mlp.down_proj.weight": "model-00017-of-00030.safetensors",
361
+ "model.layers.44.mlp.gate_proj.weight": "model-00017-of-00030.safetensors",
362
+ "model.layers.44.mlp.up_proj.weight": "model-00017-of-00030.safetensors",
363
+ "model.layers.44.post_attention_layernorm.weight": "model-00017-of-00030.safetensors",
364
+ "model.layers.44.self_attn.k_proj.weight": "model-00017-of-00030.safetensors",
365
+ "model.layers.44.self_attn.o_proj.weight": "model-00017-of-00030.safetensors",
366
+ "model.layers.44.self_attn.q_proj.weight": "model-00017-of-00030.safetensors",
367
+ "model.layers.44.self_attn.v_proj.weight": "model-00017-of-00030.safetensors",
368
+ "model.layers.45.input_layernorm.weight": "model-00017-of-00030.safetensors",
369
+ "model.layers.45.mlp.down_proj.weight": "model-00017-of-00030.safetensors",
370
+ "model.layers.45.mlp.gate_proj.weight": "model-00017-of-00030.safetensors",
371
+ "model.layers.45.mlp.up_proj.weight": "model-00017-of-00030.safetensors",
372
+ "model.layers.45.post_attention_layernorm.weight": "model-00017-of-00030.safetensors",
373
+ "model.layers.45.self_attn.k_proj.weight": "model-00017-of-00030.safetensors",
374
+ "model.layers.45.self_attn.o_proj.weight": "model-00017-of-00030.safetensors",
375
+ "model.layers.45.self_attn.q_proj.weight": "model-00017-of-00030.safetensors",
376
+ "model.layers.45.self_attn.v_proj.weight": "model-00017-of-00030.safetensors",
377
+ "model.layers.46.input_layernorm.weight": "model-00018-of-00030.safetensors",
378
+ "model.layers.46.mlp.down_proj.weight": "model-00018-of-00030.safetensors",
379
+ "model.layers.46.mlp.gate_proj.weight": "model-00018-of-00030.safetensors",
380
+ "model.layers.46.mlp.up_proj.weight": "model-00018-of-00030.safetensors",
381
+ "model.layers.46.post_attention_layernorm.weight": "model-00018-of-00030.safetensors",
382
+ "model.layers.46.self_attn.k_proj.weight": "model-00017-of-00030.safetensors",
383
+ "model.layers.46.self_attn.o_proj.weight": "model-00017-of-00030.safetensors",
384
+ "model.layers.46.self_attn.q_proj.weight": "model-00017-of-00030.safetensors",
385
+ "model.layers.46.self_attn.v_proj.weight": "model-00017-of-00030.safetensors",
386
+ "model.layers.47.input_layernorm.weight": "model-00018-of-00030.safetensors",
387
+ "model.layers.47.mlp.down_proj.weight": "model-00018-of-00030.safetensors",
388
+ "model.layers.47.mlp.gate_proj.weight": "model-00018-of-00030.safetensors",
389
+ "model.layers.47.mlp.up_proj.weight": "model-00018-of-00030.safetensors",
390
+ "model.layers.47.post_attention_layernorm.weight": "model-00018-of-00030.safetensors",
391
+ "model.layers.47.self_attn.k_proj.weight": "model-00018-of-00030.safetensors",
392
+ "model.layers.47.self_attn.o_proj.weight": "model-00018-of-00030.safetensors",
393
+ "model.layers.47.self_attn.q_proj.weight": "model-00018-of-00030.safetensors",
394
+ "model.layers.47.self_attn.v_proj.weight": "model-00018-of-00030.safetensors",
395
+ "model.layers.48.input_layernorm.weight": "model-00018-of-00030.safetensors",
396
+ "model.layers.48.mlp.down_proj.weight": "model-00018-of-00030.safetensors",
397
+ "model.layers.48.mlp.gate_proj.weight": "model-00018-of-00030.safetensors",
398
+ "model.layers.48.mlp.up_proj.weight": "model-00018-of-00030.safetensors",
399
+ "model.layers.48.post_attention_layernorm.weight": "model-00018-of-00030.safetensors",
400
+ "model.layers.48.self_attn.k_proj.weight": "model-00018-of-00030.safetensors",
401
+ "model.layers.48.self_attn.o_proj.weight": "model-00018-of-00030.safetensors",
402
+ "model.layers.48.self_attn.q_proj.weight": "model-00018-of-00030.safetensors",
403
+ "model.layers.48.self_attn.v_proj.weight": "model-00018-of-00030.safetensors",
404
+ "model.layers.49.input_layernorm.weight": "model-00019-of-00030.safetensors",
405
+ "model.layers.49.mlp.down_proj.weight": "model-00019-of-00030.safetensors",
406
+ "model.layers.49.mlp.gate_proj.weight": "model-00019-of-00030.safetensors",
407
+ "model.layers.49.mlp.up_proj.weight": "model-00019-of-00030.safetensors",
408
+ "model.layers.49.post_attention_layernorm.weight": "model-00019-of-00030.safetensors",
409
+ "model.layers.49.self_attn.k_proj.weight": "model-00018-of-00030.safetensors",
410
+ "model.layers.49.self_attn.o_proj.weight": "model-00019-of-00030.safetensors",
411
+ "model.layers.49.self_attn.q_proj.weight": "model-00018-of-00030.safetensors",
412
+ "model.layers.49.self_attn.v_proj.weight": "model-00018-of-00030.safetensors",
413
+ "model.layers.5.input_layernorm.weight": "model-00003-of-00030.safetensors",
414
+ "model.layers.5.mlp.down_proj.weight": "model-00003-of-00030.safetensors",
415
+ "model.layers.5.mlp.gate_proj.weight": "model-00003-of-00030.safetensors",
416
+ "model.layers.5.mlp.up_proj.weight": "model-00003-of-00030.safetensors",
417
+ "model.layers.5.post_attention_layernorm.weight": "model-00003-of-00030.safetensors",
418
+ "model.layers.5.self_attn.k_proj.weight": "model-00003-of-00030.safetensors",
419
+ "model.layers.5.self_attn.o_proj.weight": "model-00003-of-00030.safetensors",
420
+ "model.layers.5.self_attn.q_proj.weight": "model-00003-of-00030.safetensors",
421
+ "model.layers.5.self_attn.v_proj.weight": "model-00003-of-00030.safetensors",
422
+ "model.layers.50.input_layernorm.weight": "model-00019-of-00030.safetensors",
423
+ "model.layers.50.mlp.down_proj.weight": "model-00019-of-00030.safetensors",
424
+ "model.layers.50.mlp.gate_proj.weight": "model-00019-of-00030.safetensors",
425
+ "model.layers.50.mlp.up_proj.weight": "model-00019-of-00030.safetensors",
426
+ "model.layers.50.post_attention_layernorm.weight": "model-00019-of-00030.safetensors",
427
+ "model.layers.50.self_attn.k_proj.weight": "model-00019-of-00030.safetensors",
428
+ "model.layers.50.self_attn.o_proj.weight": "model-00019-of-00030.safetensors",
429
+ "model.layers.50.self_attn.q_proj.weight": "model-00019-of-00030.safetensors",
430
+ "model.layers.50.self_attn.v_proj.weight": "model-00019-of-00030.safetensors",
431
+ "model.layers.51.input_layernorm.weight": "model-00019-of-00030.safetensors",
432
+ "model.layers.51.mlp.down_proj.weight": "model-00019-of-00030.safetensors",
433
+ "model.layers.51.mlp.gate_proj.weight": "model-00019-of-00030.safetensors",
434
+ "model.layers.51.mlp.up_proj.weight": "model-00019-of-00030.safetensors",
435
+ "model.layers.51.post_attention_layernorm.weight": "model-00019-of-00030.safetensors",
436
+ "model.layers.51.self_attn.k_proj.weight": "model-00019-of-00030.safetensors",
437
+ "model.layers.51.self_attn.o_proj.weight": "model-00019-of-00030.safetensors",
438
+ "model.layers.51.self_attn.q_proj.weight": "model-00019-of-00030.safetensors",
439
+ "model.layers.51.self_attn.v_proj.weight": "model-00019-of-00030.safetensors",
440
+ "model.layers.52.input_layernorm.weight": "model-00020-of-00030.safetensors",
441
+ "model.layers.52.mlp.down_proj.weight": "model-00020-of-00030.safetensors",
442
+ "model.layers.52.mlp.gate_proj.weight": "model-00020-of-00030.safetensors",
443
+ "model.layers.52.mlp.up_proj.weight": "model-00020-of-00030.safetensors",
444
+ "model.layers.52.post_attention_layernorm.weight": "model-00020-of-00030.safetensors",
445
+ "model.layers.52.self_attn.k_proj.weight": "model-00020-of-00030.safetensors",
446
+ "model.layers.52.self_attn.o_proj.weight": "model-00020-of-00030.safetensors",
447
+ "model.layers.52.self_attn.q_proj.weight": "model-00020-of-00030.safetensors",
448
+ "model.layers.52.self_attn.v_proj.weight": "model-00020-of-00030.safetensors",
449
+ "model.layers.53.input_layernorm.weight": "model-00020-of-00030.safetensors",
450
+ "model.layers.53.mlp.down_proj.weight": "model-00020-of-00030.safetensors",
451
+ "model.layers.53.mlp.gate_proj.weight": "model-00020-of-00030.safetensors",
452
+ "model.layers.53.mlp.up_proj.weight": "model-00020-of-00030.safetensors",
453
+ "model.layers.53.post_attention_layernorm.weight": "model-00020-of-00030.safetensors",
454
+ "model.layers.53.self_attn.k_proj.weight": "model-00020-of-00030.safetensors",
455
+ "model.layers.53.self_attn.o_proj.weight": "model-00020-of-00030.safetensors",
456
+ "model.layers.53.self_attn.q_proj.weight": "model-00020-of-00030.safetensors",
457
+ "model.layers.53.self_attn.v_proj.weight": "model-00020-of-00030.safetensors",
458
+ "model.layers.54.input_layernorm.weight": "model-00021-of-00030.safetensors",
459
+ "model.layers.54.mlp.down_proj.weight": "model-00021-of-00030.safetensors",
460
+ "model.layers.54.mlp.gate_proj.weight": "model-00020-of-00030.safetensors",
461
+ "model.layers.54.mlp.up_proj.weight": "model-00020-of-00030.safetensors",
462
+ "model.layers.54.post_attention_layernorm.weight": "model-00021-of-00030.safetensors",
463
+ "model.layers.54.self_attn.k_proj.weight": "model-00020-of-00030.safetensors",
464
+ "model.layers.54.self_attn.o_proj.weight": "model-00020-of-00030.safetensors",
465
+ "model.layers.54.self_attn.q_proj.weight": "model-00020-of-00030.safetensors",
466
+ "model.layers.54.self_attn.v_proj.weight": "model-00020-of-00030.safetensors",
467
+ "model.layers.55.input_layernorm.weight": "model-00021-of-00030.safetensors",
468
+ "model.layers.55.mlp.down_proj.weight": "model-00021-of-00030.safetensors",
469
+ "model.layers.55.mlp.gate_proj.weight": "model-00021-of-00030.safetensors",
470
+ "model.layers.55.mlp.up_proj.weight": "model-00021-of-00030.safetensors",
471
+ "model.layers.55.post_attention_layernorm.weight": "model-00021-of-00030.safetensors",
472
+ "model.layers.55.self_attn.k_proj.weight": "model-00021-of-00030.safetensors",
473
+ "model.layers.55.self_attn.o_proj.weight": "model-00021-of-00030.safetensors",
474
+ "model.layers.55.self_attn.q_proj.weight": "model-00021-of-00030.safetensors",
475
+ "model.layers.55.self_attn.v_proj.weight": "model-00021-of-00030.safetensors",
476
+ "model.layers.56.input_layernorm.weight": "model-00021-of-00030.safetensors",
477
+ "model.layers.56.mlp.down_proj.weight": "model-00021-of-00030.safetensors",
478
+ "model.layers.56.mlp.gate_proj.weight": "model-00021-of-00030.safetensors",
479
+ "model.layers.56.mlp.up_proj.weight": "model-00021-of-00030.safetensors",
480
+ "model.layers.56.post_attention_layernorm.weight": "model-00021-of-00030.safetensors",
481
+ "model.layers.56.self_attn.k_proj.weight": "model-00021-of-00030.safetensors",
482
+ "model.layers.56.self_attn.o_proj.weight": "model-00021-of-00030.safetensors",
483
+ "model.layers.56.self_attn.q_proj.weight": "model-00021-of-00030.safetensors",
484
+ "model.layers.56.self_attn.v_proj.weight": "model-00021-of-00030.safetensors",
485
+ "model.layers.57.input_layernorm.weight": "model-00022-of-00030.safetensors",
486
+ "model.layers.57.mlp.down_proj.weight": "model-00022-of-00030.safetensors",
487
+ "model.layers.57.mlp.gate_proj.weight": "model-00021-of-00030.safetensors",
488
+ "model.layers.57.mlp.up_proj.weight": "model-00022-of-00030.safetensors",
489
+ "model.layers.57.post_attention_layernorm.weight": "model-00022-of-00030.safetensors",
490
+ "model.layers.57.self_attn.k_proj.weight": "model-00021-of-00030.safetensors",
491
+ "model.layers.57.self_attn.o_proj.weight": "model-00021-of-00030.safetensors",
492
+ "model.layers.57.self_attn.q_proj.weight": "model-00021-of-00030.safetensors",
493
+ "model.layers.57.self_attn.v_proj.weight": "model-00021-of-00030.safetensors",
494
+ "model.layers.58.input_layernorm.weight": "model-00022-of-00030.safetensors",
495
+ "model.layers.58.mlp.down_proj.weight": "model-00022-of-00030.safetensors",
496
+ "model.layers.58.mlp.gate_proj.weight": "model-00022-of-00030.safetensors",
497
+ "model.layers.58.mlp.up_proj.weight": "model-00022-of-00030.safetensors",
498
+ "model.layers.58.post_attention_layernorm.weight": "model-00022-of-00030.safetensors",
499
+ "model.layers.58.self_attn.k_proj.weight": "model-00022-of-00030.safetensors",
500
+ "model.layers.58.self_attn.o_proj.weight": "model-00022-of-00030.safetensors",
501
+ "model.layers.58.self_attn.q_proj.weight": "model-00022-of-00030.safetensors",
502
+ "model.layers.58.self_attn.v_proj.weight": "model-00022-of-00030.safetensors",
503
+ "model.layers.59.input_layernorm.weight": "model-00022-of-00030.safetensors",
504
+ "model.layers.59.mlp.down_proj.weight": "model-00022-of-00030.safetensors",
505
+ "model.layers.59.mlp.gate_proj.weight": "model-00022-of-00030.safetensors",
506
+ "model.layers.59.mlp.up_proj.weight": "model-00022-of-00030.safetensors",
507
+ "model.layers.59.post_attention_layernorm.weight": "model-00022-of-00030.safetensors",
508
+ "model.layers.59.self_attn.k_proj.weight": "model-00022-of-00030.safetensors",
509
+ "model.layers.59.self_attn.o_proj.weight": "model-00022-of-00030.safetensors",
510
+ "model.layers.59.self_attn.q_proj.weight": "model-00022-of-00030.safetensors",
511
+ "model.layers.59.self_attn.v_proj.weight": "model-00022-of-00030.safetensors",
512
+ "model.layers.6.input_layernorm.weight": "model-00003-of-00030.safetensors",
513
+ "model.layers.6.mlp.down_proj.weight": "model-00003-of-00030.safetensors",
514
+ "model.layers.6.mlp.gate_proj.weight": "model-00003-of-00030.safetensors",
515
+ "model.layers.6.mlp.up_proj.weight": "model-00003-of-00030.safetensors",
516
+ "model.layers.6.post_attention_layernorm.weight": "model-00003-of-00030.safetensors",
517
+ "model.layers.6.self_attn.k_proj.weight": "model-00003-of-00030.safetensors",
518
+ "model.layers.6.self_attn.o_proj.weight": "model-00003-of-00030.safetensors",
519
+ "model.layers.6.self_attn.q_proj.weight": "model-00003-of-00030.safetensors",
520
+ "model.layers.6.self_attn.v_proj.weight": "model-00003-of-00030.safetensors",
521
+ "model.layers.60.input_layernorm.weight": "model-00023-of-00030.safetensors",
522
+ "model.layers.60.mlp.down_proj.weight": "model-00023-of-00030.safetensors",
523
+ "model.layers.60.mlp.gate_proj.weight": "model-00023-of-00030.safetensors",
524
+ "model.layers.60.mlp.up_proj.weight": "model-00023-of-00030.safetensors",
525
+ "model.layers.60.post_attention_layernorm.weight": "model-00023-of-00030.safetensors",
526
+ "model.layers.60.self_attn.k_proj.weight": "model-00022-of-00030.safetensors",
527
+ "model.layers.60.self_attn.o_proj.weight": "model-00022-of-00030.safetensors",
528
+ "model.layers.60.self_attn.q_proj.weight": "model-00022-of-00030.safetensors",
529
+ "model.layers.60.self_attn.v_proj.weight": "model-00022-of-00030.safetensors",
530
+ "model.layers.61.input_layernorm.weight": "model-00023-of-00030.safetensors",
531
+ "model.layers.61.mlp.down_proj.weight": "model-00023-of-00030.safetensors",
532
+ "model.layers.61.mlp.gate_proj.weight": "model-00023-of-00030.safetensors",
533
+ "model.layers.61.mlp.up_proj.weight": "model-00023-of-00030.safetensors",
534
+ "model.layers.61.post_attention_layernorm.weight": "model-00023-of-00030.safetensors",
535
+ "model.layers.61.self_attn.k_proj.weight": "model-00023-of-00030.safetensors",
536
+ "model.layers.61.self_attn.o_proj.weight": "model-00023-of-00030.safetensors",
537
+ "model.layers.61.self_attn.q_proj.weight": "model-00023-of-00030.safetensors",
538
+ "model.layers.61.self_attn.v_proj.weight": "model-00023-of-00030.safetensors",
539
+ "model.layers.62.input_layernorm.weight": "model-00023-of-00030.safetensors",
540
+ "model.layers.62.mlp.down_proj.weight": "model-00023-of-00030.safetensors",
541
+ "model.layers.62.mlp.gate_proj.weight": "model-00023-of-00030.safetensors",
542
+ "model.layers.62.mlp.up_proj.weight": "model-00023-of-00030.safetensors",
543
+ "model.layers.62.post_attention_layernorm.weight": "model-00023-of-00030.safetensors",
544
+ "model.layers.62.self_attn.k_proj.weight": "model-00023-of-00030.safetensors",
545
+ "model.layers.62.self_attn.o_proj.weight": "model-00023-of-00030.safetensors",
546
+ "model.layers.62.self_attn.q_proj.weight": "model-00023-of-00030.safetensors",
547
+ "model.layers.62.self_attn.v_proj.weight": "model-00023-of-00030.safetensors",
548
+ "model.layers.63.input_layernorm.weight": "model-00024-of-00030.safetensors",
549
+ "model.layers.63.mlp.down_proj.weight": "model-00024-of-00030.safetensors",
550
+ "model.layers.63.mlp.gate_proj.weight": "model-00024-of-00030.safetensors",
551
+ "model.layers.63.mlp.up_proj.weight": "model-00024-of-00030.safetensors",
552
+ "model.layers.63.post_attention_layernorm.weight": "model-00024-of-00030.safetensors",
553
+ "model.layers.63.self_attn.k_proj.weight": "model-00023-of-00030.safetensors",
554
+ "model.layers.63.self_attn.o_proj.weight": "model-00024-of-00030.safetensors",
555
+ "model.layers.63.self_attn.q_proj.weight": "model-00023-of-00030.safetensors",
556
+ "model.layers.63.self_attn.v_proj.weight": "model-00023-of-00030.safetensors",
557
+ "model.layers.64.input_layernorm.weight": "model-00024-of-00030.safetensors",
558
+ "model.layers.64.mlp.down_proj.weight": "model-00024-of-00030.safetensors",
559
+ "model.layers.64.mlp.gate_proj.weight": "model-00024-of-00030.safetensors",
560
+ "model.layers.64.mlp.up_proj.weight": "model-00024-of-00030.safetensors",
561
+ "model.layers.64.post_attention_layernorm.weight": "model-00024-of-00030.safetensors",
562
+ "model.layers.64.self_attn.k_proj.weight": "model-00024-of-00030.safetensors",
563
+ "model.layers.64.self_attn.o_proj.weight": "model-00024-of-00030.safetensors",
564
+ "model.layers.64.self_attn.q_proj.weight": "model-00024-of-00030.safetensors",
565
+ "model.layers.64.self_attn.v_proj.weight": "model-00024-of-00030.safetensors",
566
+ "model.layers.65.input_layernorm.weight": "model-00024-of-00030.safetensors",
567
+ "model.layers.65.mlp.down_proj.weight": "model-00024-of-00030.safetensors",
568
+ "model.layers.65.mlp.gate_proj.weight": "model-00024-of-00030.safetensors",
569
+ "model.layers.65.mlp.up_proj.weight": "model-00024-of-00030.safetensors",
570
+ "model.layers.65.post_attention_layernorm.weight": "model-00024-of-00030.safetensors",
571
+ "model.layers.65.self_attn.k_proj.weight": "model-00024-of-00030.safetensors",
572
+ "model.layers.65.self_attn.o_proj.weight": "model-00024-of-00030.safetensors",
573
+ "model.layers.65.self_attn.q_proj.weight": "model-00024-of-00030.safetensors",
574
+ "model.layers.65.self_attn.v_proj.weight": "model-00024-of-00030.safetensors",
575
+ "model.layers.66.input_layernorm.weight": "model-00025-of-00030.safetensors",
576
+ "model.layers.66.mlp.down_proj.weight": "model-00025-of-00030.safetensors",
577
+ "model.layers.66.mlp.gate_proj.weight": "model-00025-of-00030.safetensors",
578
+ "model.layers.66.mlp.up_proj.weight": "model-00025-of-00030.safetensors",
579
+ "model.layers.66.post_attention_layernorm.weight": "model-00025-of-00030.safetensors",
580
+ "model.layers.66.self_attn.k_proj.weight": "model-00025-of-00030.safetensors",
581
+ "model.layers.66.self_attn.o_proj.weight": "model-00025-of-00030.safetensors",
582
+ "model.layers.66.self_attn.q_proj.weight": "model-00025-of-00030.safetensors",
583
+ "model.layers.66.self_attn.v_proj.weight": "model-00025-of-00030.safetensors",
584
+ "model.layers.67.input_layernorm.weight": "model-00025-of-00030.safetensors",
585
+ "model.layers.67.mlp.down_proj.weight": "model-00025-of-00030.safetensors",
586
+ "model.layers.67.mlp.gate_proj.weight": "model-00025-of-00030.safetensors",
587
+ "model.layers.67.mlp.up_proj.weight": "model-00025-of-00030.safetensors",
588
+ "model.layers.67.post_attention_layernorm.weight": "model-00025-of-00030.safetensors",
589
+ "model.layers.67.self_attn.k_proj.weight": "model-00025-of-00030.safetensors",
590
+ "model.layers.67.self_attn.o_proj.weight": "model-00025-of-00030.safetensors",
591
+ "model.layers.67.self_attn.q_proj.weight": "model-00025-of-00030.safetensors",
592
+ "model.layers.67.self_attn.v_proj.weight": "model-00025-of-00030.safetensors",
593
+ "model.layers.68.input_layernorm.weight": "model-00026-of-00030.safetensors",
594
+ "model.layers.68.mlp.down_proj.weight": "model-00026-of-00030.safetensors",
595
+ "model.layers.68.mlp.gate_proj.weight": "model-00025-of-00030.safetensors",
596
+ "model.layers.68.mlp.up_proj.weight": "model-00025-of-00030.safetensors",
597
+ "model.layers.68.post_attention_layernorm.weight": "model-00026-of-00030.safetensors",
598
+ "model.layers.68.self_attn.k_proj.weight": "model-00025-of-00030.safetensors",
599
+ "model.layers.68.self_attn.o_proj.weight": "model-00025-of-00030.safetensors",
600
+ "model.layers.68.self_attn.q_proj.weight": "model-00025-of-00030.safetensors",
601
+ "model.layers.68.self_attn.v_proj.weight": "model-00025-of-00030.safetensors",
602
+ "model.layers.69.input_layernorm.weight": "model-00026-of-00030.safetensors",
603
+ "model.layers.69.mlp.down_proj.weight": "model-00026-of-00030.safetensors",
604
+ "model.layers.69.mlp.gate_proj.weight": "model-00026-of-00030.safetensors",
605
+ "model.layers.69.mlp.up_proj.weight": "model-00026-of-00030.safetensors",
606
+ "model.layers.69.post_attention_layernorm.weight": "model-00026-of-00030.safetensors",
607
+ "model.layers.69.self_attn.k_proj.weight": "model-00026-of-00030.safetensors",
608
+ "model.layers.69.self_attn.o_proj.weight": "model-00026-of-00030.safetensors",
609
+ "model.layers.69.self_attn.q_proj.weight": "model-00026-of-00030.safetensors",
610
+ "model.layers.69.self_attn.v_proj.weight": "model-00026-of-00030.safetensors",
611
+ "model.layers.7.input_layernorm.weight": "model-00004-of-00030.safetensors",
612
+ "model.layers.7.mlp.down_proj.weight": "model-00004-of-00030.safetensors",
613
+ "model.layers.7.mlp.gate_proj.weight": "model-00004-of-00030.safetensors",
614
+ "model.layers.7.mlp.up_proj.weight": "model-00004-of-00030.safetensors",
615
+ "model.layers.7.post_attention_layernorm.weight": "model-00004-of-00030.safetensors",
616
+ "model.layers.7.self_attn.k_proj.weight": "model-00003-of-00030.safetensors",
617
+ "model.layers.7.self_attn.o_proj.weight": "model-00004-of-00030.safetensors",
618
+ "model.layers.7.self_attn.q_proj.weight": "model-00003-of-00030.safetensors",
619
+ "model.layers.7.self_attn.v_proj.weight": "model-00003-of-00030.safetensors",
620
+ "model.layers.70.input_layernorm.weight": "model-00026-of-00030.safetensors",
621
+ "model.layers.70.mlp.down_proj.weight": "model-00026-of-00030.safetensors",
622
+ "model.layers.70.mlp.gate_proj.weight": "model-00026-of-00030.safetensors",
623
+ "model.layers.70.mlp.up_proj.weight": "model-00026-of-00030.safetensors",
624
+ "model.layers.70.post_attention_layernorm.weight": "model-00026-of-00030.safetensors",
625
+ "model.layers.70.self_attn.k_proj.weight": "model-00026-of-00030.safetensors",
626
+ "model.layers.70.self_attn.o_proj.weight": "model-00026-of-00030.safetensors",
627
+ "model.layers.70.self_attn.q_proj.weight": "model-00026-of-00030.safetensors",
628
+ "model.layers.70.self_attn.v_proj.weight": "model-00026-of-00030.safetensors",
629
+ "model.layers.71.input_layernorm.weight": "model-00027-of-00030.safetensors",
630
+ "model.layers.71.mlp.down_proj.weight": "model-00027-of-00030.safetensors",
631
+ "model.layers.71.mlp.gate_proj.weight": "model-00026-of-00030.safetensors",
632
+ "model.layers.71.mlp.up_proj.weight": "model-00027-of-00030.safetensors",
633
+ "model.layers.71.post_attention_layernorm.weight": "model-00027-of-00030.safetensors",
634
+ "model.layers.71.self_attn.k_proj.weight": "model-00026-of-00030.safetensors",
635
+ "model.layers.71.self_attn.o_proj.weight": "model-00026-of-00030.safetensors",
636
+ "model.layers.71.self_attn.q_proj.weight": "model-00026-of-00030.safetensors",
637
+ "model.layers.71.self_attn.v_proj.weight": "model-00026-of-00030.safetensors",
638
+ "model.layers.72.input_layernorm.weight": "model-00027-of-00030.safetensors",
639
+ "model.layers.72.mlp.down_proj.weight": "model-00027-of-00030.safetensors",
640
+ "model.layers.72.mlp.gate_proj.weight": "model-00027-of-00030.safetensors",
641
+ "model.layers.72.mlp.up_proj.weight": "model-00027-of-00030.safetensors",
642
+ "model.layers.72.post_attention_layernorm.weight": "model-00027-of-00030.safetensors",
643
+ "model.layers.72.self_attn.k_proj.weight": "model-00027-of-00030.safetensors",
644
+ "model.layers.72.self_attn.o_proj.weight": "model-00027-of-00030.safetensors",
645
+ "model.layers.72.self_attn.q_proj.weight": "model-00027-of-00030.safetensors",
646
+ "model.layers.72.self_attn.v_proj.weight": "model-00027-of-00030.safetensors",
647
+ "model.layers.73.input_layernorm.weight": "model-00027-of-00030.safetensors",
648
+ "model.layers.73.mlp.down_proj.weight": "model-00027-of-00030.safetensors",
649
+ "model.layers.73.mlp.gate_proj.weight": "model-00027-of-00030.safetensors",
650
+ "model.layers.73.mlp.up_proj.weight": "model-00027-of-00030.safetensors",
651
+ "model.layers.73.post_attention_layernorm.weight": "model-00027-of-00030.safetensors",
652
+ "model.layers.73.self_attn.k_proj.weight": "model-00027-of-00030.safetensors",
653
+ "model.layers.73.self_attn.o_proj.weight": "model-00027-of-00030.safetensors",
654
+ "model.layers.73.self_attn.q_proj.weight": "model-00027-of-00030.safetensors",
655
+ "model.layers.73.self_attn.v_proj.weight": "model-00027-of-00030.safetensors",
656
+ "model.layers.74.input_layernorm.weight": "model-00028-of-00030.safetensors",
657
+ "model.layers.74.mlp.down_proj.weight": "model-00028-of-00030.safetensors",
658
+ "model.layers.74.mlp.gate_proj.weight": "model-00028-of-00030.safetensors",
659
+ "model.layers.74.mlp.up_proj.weight": "model-00028-of-00030.safetensors",
660
+ "model.layers.74.post_attention_layernorm.weight": "model-00028-of-00030.safetensors",
661
+ "model.layers.74.self_attn.k_proj.weight": "model-00027-of-00030.safetensors",
662
+ "model.layers.74.self_attn.o_proj.weight": "model-00027-of-00030.safetensors",
663
+ "model.layers.74.self_attn.q_proj.weight": "model-00027-of-00030.safetensors",
664
+ "model.layers.74.self_attn.v_proj.weight": "model-00027-of-00030.safetensors",
665
+ "model.layers.75.input_layernorm.weight": "model-00028-of-00030.safetensors",
666
+ "model.layers.75.mlp.down_proj.weight": "model-00028-of-00030.safetensors",
667
+ "model.layers.75.mlp.gate_proj.weight": "model-00028-of-00030.safetensors",
668
+ "model.layers.75.mlp.up_proj.weight": "model-00028-of-00030.safetensors",
669
+ "model.layers.75.post_attention_layernorm.weight": "model-00028-of-00030.safetensors",
670
+ "model.layers.75.self_attn.k_proj.weight": "model-00028-of-00030.safetensors",
671
+ "model.layers.75.self_attn.o_proj.weight": "model-00028-of-00030.safetensors",
672
+ "model.layers.75.self_attn.q_proj.weight": "model-00028-of-00030.safetensors",
673
+ "model.layers.75.self_attn.v_proj.weight": "model-00028-of-00030.safetensors",
674
+ "model.layers.76.input_layernorm.weight": "model-00028-of-00030.safetensors",
675
+ "model.layers.76.mlp.down_proj.weight": "model-00028-of-00030.safetensors",
676
+ "model.layers.76.mlp.gate_proj.weight": "model-00028-of-00030.safetensors",
677
+ "model.layers.76.mlp.up_proj.weight": "model-00028-of-00030.safetensors",
678
+ "model.layers.76.post_attention_layernorm.weight": "model-00028-of-00030.safetensors",
679
+ "model.layers.76.self_attn.k_proj.weight": "model-00028-of-00030.safetensors",
680
+ "model.layers.76.self_attn.o_proj.weight": "model-00028-of-00030.safetensors",
681
+ "model.layers.76.self_attn.q_proj.weight": "model-00028-of-00030.safetensors",
682
+ "model.layers.76.self_attn.v_proj.weight": "model-00028-of-00030.safetensors",
683
+ "model.layers.77.input_layernorm.weight": "model-00029-of-00030.safetensors",
684
+ "model.layers.77.mlp.down_proj.weight": "model-00029-of-00030.safetensors",
685
+ "model.layers.77.mlp.gate_proj.weight": "model-00029-of-00030.safetensors",
686
+ "model.layers.77.mlp.up_proj.weight": "model-00029-of-00030.safetensors",
687
+ "model.layers.77.post_attention_layernorm.weight": "model-00029-of-00030.safetensors",
688
+ "model.layers.77.self_attn.k_proj.weight": "model-00028-of-00030.safetensors",
689
+ "model.layers.77.self_attn.o_proj.weight": "model-00029-of-00030.safetensors",
690
+ "model.layers.77.self_attn.q_proj.weight": "model-00028-of-00030.safetensors",
691
+ "model.layers.77.self_attn.v_proj.weight": "model-00028-of-00030.safetensors",
692
+ "model.layers.78.input_layernorm.weight": "model-00029-of-00030.safetensors",
693
+ "model.layers.78.mlp.down_proj.weight": "model-00029-of-00030.safetensors",
694
+ "model.layers.78.mlp.gate_proj.weight": "model-00029-of-00030.safetensors",
695
+ "model.layers.78.mlp.up_proj.weight": "model-00029-of-00030.safetensors",
696
+ "model.layers.78.post_attention_layernorm.weight": "model-00029-of-00030.safetensors",
697
+ "model.layers.78.self_attn.k_proj.weight": "model-00029-of-00030.safetensors",
698
+ "model.layers.78.self_attn.o_proj.weight": "model-00029-of-00030.safetensors",
699
+ "model.layers.78.self_attn.q_proj.weight": "model-00029-of-00030.safetensors",
700
+ "model.layers.78.self_attn.v_proj.weight": "model-00029-of-00030.safetensors",
701
+ "model.layers.79.input_layernorm.weight": "model-00029-of-00030.safetensors",
702
+ "model.layers.79.mlp.down_proj.weight": "model-00029-of-00030.safetensors",
703
+ "model.layers.79.mlp.gate_proj.weight": "model-00029-of-00030.safetensors",
704
+ "model.layers.79.mlp.up_proj.weight": "model-00029-of-00030.safetensors",
705
+ "model.layers.79.post_attention_layernorm.weight": "model-00029-of-00030.safetensors",
706
+ "model.layers.79.self_attn.k_proj.weight": "model-00029-of-00030.safetensors",
707
+ "model.layers.79.self_attn.o_proj.weight": "model-00029-of-00030.safetensors",
708
+ "model.layers.79.self_attn.q_proj.weight": "model-00029-of-00030.safetensors",
709
+ "model.layers.79.self_attn.v_proj.weight": "model-00029-of-00030.safetensors",
710
+ "model.layers.8.input_layernorm.weight": "model-00004-of-00030.safetensors",
711
+ "model.layers.8.mlp.down_proj.weight": "model-00004-of-00030.safetensors",
712
+ "model.layers.8.mlp.gate_proj.weight": "model-00004-of-00030.safetensors",
713
+ "model.layers.8.mlp.up_proj.weight": "model-00004-of-00030.safetensors",
714
+ "model.layers.8.post_attention_layernorm.weight": "model-00004-of-00030.safetensors",
715
+ "model.layers.8.self_attn.k_proj.weight": "model-00004-of-00030.safetensors",
716
+ "model.layers.8.self_attn.o_proj.weight": "model-00004-of-00030.safetensors",
717
+ "model.layers.8.self_attn.q_proj.weight": "model-00004-of-00030.safetensors",
718
+ "model.layers.8.self_attn.v_proj.weight": "model-00004-of-00030.safetensors",
719
+ "model.layers.9.input_layernorm.weight": "model-00004-of-00030.safetensors",
720
+ "model.layers.9.mlp.down_proj.weight": "model-00004-of-00030.safetensors",
721
+ "model.layers.9.mlp.gate_proj.weight": "model-00004-of-00030.safetensors",
722
+ "model.layers.9.mlp.up_proj.weight": "model-00004-of-00030.safetensors",
723
+ "model.layers.9.post_attention_layernorm.weight": "model-00004-of-00030.safetensors",
724
+ "model.layers.9.self_attn.k_proj.weight": "model-00004-of-00030.safetensors",
725
+ "model.layers.9.self_attn.o_proj.weight": "model-00004-of-00030.safetensors",
726
+ "model.layers.9.self_attn.q_proj.weight": "model-00004-of-00030.safetensors",
727
+ "model.layers.9.self_attn.v_proj.weight": "model-00004-of-00030.safetensors",
728
+ "model.norm.weight": "model-00029-of-00030.safetensors"
729
+ }
730
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin▁of▁sentence|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end▁of▁sentence|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|end▁of▁sentence|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9c9eb63a8e03059914880f918cd28a880dec8b6e15e4461e1ff677e3743dbb8
3
+ size 9084480
tokenizer_config.json ADDED
@@ -0,0 +1,2067 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "128000": {
7
+ "content": "<|begin▁of▁sentence|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "128001": {
15
+ "content": "<|end▁of▁sentence|>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "128002": {
23
+ "content": "<|reserved_special_token_0|>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "128003": {
31
+ "content": "<|reserved_special_token_1|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "128004": {
39
+ "content": "<|finetune_right_pad_id|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "128005": {
47
+ "content": "<|reserved_special_token_2|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "128006": {
55
+ "content": "<|start_header_id|>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": false,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
+ "128007": {
63
+ "content": "<|end_header_id|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": false,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "128008": {
71
+ "content": "<|eom_id|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": false,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "128009": {
79
+ "content": "<|eot_id|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": false,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "128010": {
87
+ "content": "<|python_tag|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": false,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "128011": {
95
+ "content": "<|User|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false,
100
+ "special": false
101
+ },
102
+ "128012": {
103
+ "content": "<|Assistant|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": false,
107
+ "single_word": false,
108
+ "special": false
109
+ },
110
+ "128013": {
111
+ "content": "<think>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": false,
115
+ "single_word": false,
116
+ "special": false
117
+ },
118
+ "128014": {
119
+ "content": "</think>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false,
124
+ "special": false
125
+ },
126
+ "128015": {
127
+ "content": "<|▁pad▁|>",
128
+ "lstrip": false,
129
+ "normalized": false,
130
+ "rstrip": false,
131
+ "single_word": false,
132
+ "special": true
133
+ },
134
+ "128016": {
135
+ "content": "<|reserved_special_token_8|>",
136
+ "lstrip": false,
137
+ "normalized": false,
138
+ "rstrip": false,
139
+ "single_word": false,
140
+ "special": true
141
+ },
142
+ "128017": {
143
+ "content": "<|reserved_special_token_9|>",
144
+ "lstrip": false,
145
+ "normalized": false,
146
+ "rstrip": false,
147
+ "single_word": false,
148
+ "special": true
149
+ },
150
+ "128018": {
151
+ "content": "<|reserved_special_token_10|>",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false,
156
+ "special": true
157
+ },
158
+ "128019": {
159
+ "content": "<|reserved_special_token_11|>",
160
+ "lstrip": false,
161
+ "normalized": false,
162
+ "rstrip": false,
163
+ "single_word": false,
164
+ "special": true
165
+ },
166
+ "128020": {
167
+ "content": "<|reserved_special_token_12|>",
168
+ "lstrip": false,
169
+ "normalized": false,
170
+ "rstrip": false,
171
+ "single_word": false,
172
+ "special": true
173
+ },
174
+ "128021": {
175
+ "content": "<|reserved_special_token_13|>",
176
+ "lstrip": false,
177
+ "normalized": false,
178
+ "rstrip": false,
179
+ "single_word": false,
180
+ "special": true
181
+ },
182
+ "128022": {
183
+ "content": "<|reserved_special_token_14|>",
184
+ "lstrip": false,
185
+ "normalized": false,
186
+ "rstrip": false,
187
+ "single_word": false,
188
+ "special": true
189
+ },
190
+ "128023": {
191
+ "content": "<|reserved_special_token_15|>",
192
+ "lstrip": false,
193
+ "normalized": false,
194
+ "rstrip": false,
195
+ "single_word": false,
196
+ "special": true
197
+ },
198
+ "128024": {
199
+ "content": "<|reserved_special_token_16|>",
200
+ "lstrip": false,
201
+ "normalized": false,
202
+ "rstrip": false,
203
+ "single_word": false,
204
+ "special": true
205
+ },
206
+ "128025": {
207
+ "content": "<|reserved_special_token_17|>",
208
+ "lstrip": false,
209
+ "normalized": false,
210
+ "rstrip": false,
211
+ "single_word": false,
212
+ "special": true
213
+ },
214
+ "128026": {
215
+ "content": "<|reserved_special_token_18|>",
216
+ "lstrip": false,
217
+ "normalized": false,
218
+ "rstrip": false,
219
+ "single_word": false,
220
+ "special": true
221
+ },
222
+ "128027": {
223
+ "content": "<|reserved_special_token_19|>",
224
+ "lstrip": false,
225
+ "normalized": false,
226
+ "rstrip": false,
227
+ "single_word": false,
228
+ "special": true
229
+ },
230
+ "128028": {
231
+ "content": "<|reserved_special_token_20|>",
232
+ "lstrip": false,
233
+ "normalized": false,
234
+ "rstrip": false,
235
+ "single_word": false,
236
+ "special": true
237
+ },
238
+ "128029": {
239
+ "content": "<|reserved_special_token_21|>",
240
+ "lstrip": false,
241
+ "normalized": false,
242
+ "rstrip": false,
243
+ "single_word": false,
244
+ "special": true
245
+ },
246
+ "128030": {
247
+ "content": "<|reserved_special_token_22|>",
248
+ "lstrip": false,
249
+ "normalized": false,
250
+ "rstrip": false,
251
+ "single_word": false,
252
+ "special": true
253
+ },
254
+ "128031": {
255
+ "content": "<|reserved_special_token_23|>",
256
+ "lstrip": false,
257
+ "normalized": false,
258
+ "rstrip": false,
259
+ "single_word": false,
260
+ "special": true
261
+ },
262
+ "128032": {
263
+ "content": "<|reserved_special_token_24|>",
264
+ "lstrip": false,
265
+ "normalized": false,
266
+ "rstrip": false,
267
+ "single_word": false,
268
+ "special": true
269
+ },
270
+ "128033": {
271
+ "content": "<|reserved_special_token_25|>",
272
+ "lstrip": false,
273
+ "normalized": false,
274
+ "rstrip": false,
275
+ "single_word": false,
276
+ "special": true
277
+ },
278
+ "128034": {
279
+ "content": "<|reserved_special_token_26|>",
280
+ "lstrip": false,
281
+ "normalized": false,
282
+ "rstrip": false,
283
+ "single_word": false,
284
+ "special": true
285
+ },
286
+ "128035": {
287
+ "content": "<|reserved_special_token_27|>",
288
+ "lstrip": false,
289
+ "normalized": false,
290
+ "rstrip": false,
291
+ "single_word": false,
292
+ "special": true
293
+ },
294
+ "128036": {
295
+ "content": "<|reserved_special_token_28|>",
296
+ "lstrip": false,
297
+ "normalized": false,
298
+ "rstrip": false,
299
+ "single_word": false,
300
+ "special": true
301
+ },
302
+ "128037": {
303
+ "content": "<|reserved_special_token_29|>",
304
+ "lstrip": false,
305
+ "normalized": false,
306
+ "rstrip": false,
307
+ "single_word": false,
308
+ "special": true
309
+ },
310
+ "128038": {
311
+ "content": "<|reserved_special_token_30|>",
312
+ "lstrip": false,
313
+ "normalized": false,
314
+ "rstrip": false,
315
+ "single_word": false,
316
+ "special": true
317
+ },
318
+ "128039": {
319
+ "content": "<|reserved_special_token_31|>",
320
+ "lstrip": false,
321
+ "normalized": false,
322
+ "rstrip": false,
323
+ "single_word": false,
324
+ "special": true
325
+ },
326
+ "128040": {
327
+ "content": "<|reserved_special_token_32|>",
328
+ "lstrip": false,
329
+ "normalized": false,
330
+ "rstrip": false,
331
+ "single_word": false,
332
+ "special": true
333
+ },
334
+ "128041": {
335
+ "content": "<|reserved_special_token_33|>",
336
+ "lstrip": false,
337
+ "normalized": false,
338
+ "rstrip": false,
339
+ "single_word": false,
340
+ "special": true
341
+ },
342
+ "128042": {
343
+ "content": "<|reserved_special_token_34|>",
344
+ "lstrip": false,
345
+ "normalized": false,
346
+ "rstrip": false,
347
+ "single_word": false,
348
+ "special": true
349
+ },
350
+ "128043": {
351
+ "content": "<|reserved_special_token_35|>",
352
+ "lstrip": false,
353
+ "normalized": false,
354
+ "rstrip": false,
355
+ "single_word": false,
356
+ "special": true
357
+ },
358
+ "128044": {
359
+ "content": "<|reserved_special_token_36|>",
360
+ "lstrip": false,
361
+ "normalized": false,
362
+ "rstrip": false,
363
+ "single_word": false,
364
+ "special": true
365
+ },
366
+ "128045": {
367
+ "content": "<|reserved_special_token_37|>",
368
+ "lstrip": false,
369
+ "normalized": false,
370
+ "rstrip": false,
371
+ "single_word": false,
372
+ "special": true
373
+ },
374
+ "128046": {
375
+ "content": "<|reserved_special_token_38|>",
376
+ "lstrip": false,
377
+ "normalized": false,
378
+ "rstrip": false,
379
+ "single_word": false,
380
+ "special": true
381
+ },
382
+ "128047": {
383
+ "content": "<|reserved_special_token_39|>",
384
+ "lstrip": false,
385
+ "normalized": false,
386
+ "rstrip": false,
387
+ "single_word": false,
388
+ "special": true
389
+ },
390
+ "128048": {
391
+ "content": "<|reserved_special_token_40|>",
392
+ "lstrip": false,
393
+ "normalized": false,
394
+ "rstrip": false,
395
+ "single_word": false,
396
+ "special": true
397
+ },
398
+ "128049": {
399
+ "content": "<|reserved_special_token_41|>",
400
+ "lstrip": false,
401
+ "normalized": false,
402
+ "rstrip": false,
403
+ "single_word": false,
404
+ "special": true
405
+ },
406
+ "128050": {
407
+ "content": "<|reserved_special_token_42|>",
408
+ "lstrip": false,
409
+ "normalized": false,
410
+ "rstrip": false,
411
+ "single_word": false,
412
+ "special": true
413
+ },
414
+ "128051": {
415
+ "content": "<|reserved_special_token_43|>",
416
+ "lstrip": false,
417
+ "normalized": false,
418
+ "rstrip": false,
419
+ "single_word": false,
420
+ "special": true
421
+ },
422
+ "128052": {
423
+ "content": "<|reserved_special_token_44|>",
424
+ "lstrip": false,
425
+ "normalized": false,
426
+ "rstrip": false,
427
+ "single_word": false,
428
+ "special": true
429
+ },
430
+ "128053": {
431
+ "content": "<|reserved_special_token_45|>",
432
+ "lstrip": false,
433
+ "normalized": false,
434
+ "rstrip": false,
435
+ "single_word": false,
436
+ "special": true
437
+ },
438
+ "128054": {
439
+ "content": "<|reserved_special_token_46|>",
440
+ "lstrip": false,
441
+ "normalized": false,
442
+ "rstrip": false,
443
+ "single_word": false,
444
+ "special": true
445
+ },
446
+ "128055": {
447
+ "content": "<|reserved_special_token_47|>",
448
+ "lstrip": false,
449
+ "normalized": false,
450
+ "rstrip": false,
451
+ "single_word": false,
452
+ "special": true
453
+ },
454
+ "128056": {
455
+ "content": "<|reserved_special_token_48|>",
456
+ "lstrip": false,
457
+ "normalized": false,
458
+ "rstrip": false,
459
+ "single_word": false,
460
+ "special": true
461
+ },
462
+ "128057": {
463
+ "content": "<|reserved_special_token_49|>",
464
+ "lstrip": false,
465
+ "normalized": false,
466
+ "rstrip": false,
467
+ "single_word": false,
468
+ "special": true
469
+ },
470
+ "128058": {
471
+ "content": "<|reserved_special_token_50|>",
472
+ "lstrip": false,
473
+ "normalized": false,
474
+ "rstrip": false,
475
+ "single_word": false,
476
+ "special": true
477
+ },
478
+ "128059": {
479
+ "content": "<|reserved_special_token_51|>",
480
+ "lstrip": false,
481
+ "normalized": false,
482
+ "rstrip": false,
483
+ "single_word": false,
484
+ "special": true
485
+ },
486
+ "128060": {
487
+ "content": "<|reserved_special_token_52|>",
488
+ "lstrip": false,
489
+ "normalized": false,
490
+ "rstrip": false,
491
+ "single_word": false,
492
+ "special": true
493
+ },
494
+ "128061": {
495
+ "content": "<|reserved_special_token_53|>",
496
+ "lstrip": false,
497
+ "normalized": false,
498
+ "rstrip": false,
499
+ "single_word": false,
500
+ "special": true
501
+ },
502
+ "128062": {
503
+ "content": "<|reserved_special_token_54|>",
504
+ "lstrip": false,
505
+ "normalized": false,
506
+ "rstrip": false,
507
+ "single_word": false,
508
+ "special": true
509
+ },
510
+ "128063": {
511
+ "content": "<|reserved_special_token_55|>",
512
+ "lstrip": false,
513
+ "normalized": false,
514
+ "rstrip": false,
515
+ "single_word": false,
516
+ "special": true
517
+ },
518
+ "128064": {
519
+ "content": "<|reserved_special_token_56|>",
520
+ "lstrip": false,
521
+ "normalized": false,
522
+ "rstrip": false,
523
+ "single_word": false,
524
+ "special": true
525
+ },
526
+ "128065": {
527
+ "content": "<|reserved_special_token_57|>",
528
+ "lstrip": false,
529
+ "normalized": false,
530
+ "rstrip": false,
531
+ "single_word": false,
532
+ "special": true
533
+ },
534
+ "128066": {
535
+ "content": "<|reserved_special_token_58|>",
536
+ "lstrip": false,
537
+ "normalized": false,
538
+ "rstrip": false,
539
+ "single_word": false,
540
+ "special": true
541
+ },
542
+ "128067": {
543
+ "content": "<|reserved_special_token_59|>",
544
+ "lstrip": false,
545
+ "normalized": false,
546
+ "rstrip": false,
547
+ "single_word": false,
548
+ "special": true
549
+ },
550
+ "128068": {
551
+ "content": "<|reserved_special_token_60|>",
552
+ "lstrip": false,
553
+ "normalized": false,
554
+ "rstrip": false,
555
+ "single_word": false,
556
+ "special": true
557
+ },
558
+ "128069": {
559
+ "content": "<|reserved_special_token_61|>",
560
+ "lstrip": false,
561
+ "normalized": false,
562
+ "rstrip": false,
563
+ "single_word": false,
564
+ "special": true
565
+ },
566
+ "128070": {
567
+ "content": "<|reserved_special_token_62|>",
568
+ "lstrip": false,
569
+ "normalized": false,
570
+ "rstrip": false,
571
+ "single_word": false,
572
+ "special": true
573
+ },
574
+ "128071": {
575
+ "content": "<|reserved_special_token_63|>",
576
+ "lstrip": false,
577
+ "normalized": false,
578
+ "rstrip": false,
579
+ "single_word": false,
580
+ "special": true
581
+ },
582
+ "128072": {
583
+ "content": "<|reserved_special_token_64|>",
584
+ "lstrip": false,
585
+ "normalized": false,
586
+ "rstrip": false,
587
+ "single_word": false,
588
+ "special": true
589
+ },
590
+ "128073": {
591
+ "content": "<|reserved_special_token_65|>",
592
+ "lstrip": false,
593
+ "normalized": false,
594
+ "rstrip": false,
595
+ "single_word": false,
596
+ "special": true
597
+ },
598
+ "128074": {
599
+ "content": "<|reserved_special_token_66|>",
600
+ "lstrip": false,
601
+ "normalized": false,
602
+ "rstrip": false,
603
+ "single_word": false,
604
+ "special": true
605
+ },
606
+ "128075": {
607
+ "content": "<|reserved_special_token_67|>",
608
+ "lstrip": false,
609
+ "normalized": false,
610
+ "rstrip": false,
611
+ "single_word": false,
612
+ "special": true
613
+ },
614
+ "128076": {
615
+ "content": "<|reserved_special_token_68|>",
616
+ "lstrip": false,
617
+ "normalized": false,
618
+ "rstrip": false,
619
+ "single_word": false,
620
+ "special": true
621
+ },
622
+ "128077": {
623
+ "content": "<|reserved_special_token_69|>",
624
+ "lstrip": false,
625
+ "normalized": false,
626
+ "rstrip": false,
627
+ "single_word": false,
628
+ "special": true
629
+ },
630
+ "128078": {
631
+ "content": "<|reserved_special_token_70|>",
632
+ "lstrip": false,
633
+ "normalized": false,
634
+ "rstrip": false,
635
+ "single_word": false,
636
+ "special": true
637
+ },
638
+ "128079": {
639
+ "content": "<|reserved_special_token_71|>",
640
+ "lstrip": false,
641
+ "normalized": false,
642
+ "rstrip": false,
643
+ "single_word": false,
644
+ "special": true
645
+ },
646
+ "128080": {
647
+ "content": "<|reserved_special_token_72|>",
648
+ "lstrip": false,
649
+ "normalized": false,
650
+ "rstrip": false,
651
+ "single_word": false,
652
+ "special": true
653
+ },
654
+ "128081": {
655
+ "content": "<|reserved_special_token_73|>",
656
+ "lstrip": false,
657
+ "normalized": false,
658
+ "rstrip": false,
659
+ "single_word": false,
660
+ "special": true
661
+ },
662
+ "128082": {
663
+ "content": "<|reserved_special_token_74|>",
664
+ "lstrip": false,
665
+ "normalized": false,
666
+ "rstrip": false,
667
+ "single_word": false,
668
+ "special": true
669
+ },
670
+ "128083": {
671
+ "content": "<|reserved_special_token_75|>",
672
+ "lstrip": false,
673
+ "normalized": false,
674
+ "rstrip": false,
675
+ "single_word": false,
676
+ "special": true
677
+ },
678
+ "128084": {
679
+ "content": "<|reserved_special_token_76|>",
680
+ "lstrip": false,
681
+ "normalized": false,
682
+ "rstrip": false,
683
+ "single_word": false,
684
+ "special": true
685
+ },
686
+ "128085": {
687
+ "content": "<|reserved_special_token_77|>",
688
+ "lstrip": false,
689
+ "normalized": false,
690
+ "rstrip": false,
691
+ "single_word": false,
692
+ "special": true
693
+ },
694
+ "128086": {
695
+ "content": "<|reserved_special_token_78|>",
696
+ "lstrip": false,
697
+ "normalized": false,
698
+ "rstrip": false,
699
+ "single_word": false,
700
+ "special": true
701
+ },
702
+ "128087": {
703
+ "content": "<|reserved_special_token_79|>",
704
+ "lstrip": false,
705
+ "normalized": false,
706
+ "rstrip": false,
707
+ "single_word": false,
708
+ "special": true
709
+ },
710
+ "128088": {
711
+ "content": "<|reserved_special_token_80|>",
712
+ "lstrip": false,
713
+ "normalized": false,
714
+ "rstrip": false,
715
+ "single_word": false,
716
+ "special": true
717
+ },
718
+ "128089": {
719
+ "content": "<|reserved_special_token_81|>",
720
+ "lstrip": false,
721
+ "normalized": false,
722
+ "rstrip": false,
723
+ "single_word": false,
724
+ "special": true
725
+ },
726
+ "128090": {
727
+ "content": "<|reserved_special_token_82|>",
728
+ "lstrip": false,
729
+ "normalized": false,
730
+ "rstrip": false,
731
+ "single_word": false,
732
+ "special": true
733
+ },
734
+ "128091": {
735
+ "content": "<|reserved_special_token_83|>",
736
+ "lstrip": false,
737
+ "normalized": false,
738
+ "rstrip": false,
739
+ "single_word": false,
740
+ "special": true
741
+ },
742
+ "128092": {
743
+ "content": "<|reserved_special_token_84|>",
744
+ "lstrip": false,
745
+ "normalized": false,
746
+ "rstrip": false,
747
+ "single_word": false,
748
+ "special": true
749
+ },
750
+ "128093": {
751
+ "content": "<|reserved_special_token_85|>",
752
+ "lstrip": false,
753
+ "normalized": false,
754
+ "rstrip": false,
755
+ "single_word": false,
756
+ "special": true
757
+ },
758
+ "128094": {
759
+ "content": "<|reserved_special_token_86|>",
760
+ "lstrip": false,
761
+ "normalized": false,
762
+ "rstrip": false,
763
+ "single_word": false,
764
+ "special": true
765
+ },
766
+ "128095": {
767
+ "content": "<|reserved_special_token_87|>",
768
+ "lstrip": false,
769
+ "normalized": false,
770
+ "rstrip": false,
771
+ "single_word": false,
772
+ "special": true
773
+ },
774
+ "128096": {
775
+ "content": "<|reserved_special_token_88|>",
776
+ "lstrip": false,
777
+ "normalized": false,
778
+ "rstrip": false,
779
+ "single_word": false,
780
+ "special": true
781
+ },
782
+ "128097": {
783
+ "content": "<|reserved_special_token_89|>",
784
+ "lstrip": false,
785
+ "normalized": false,
786
+ "rstrip": false,
787
+ "single_word": false,
788
+ "special": true
789
+ },
790
+ "128098": {
791
+ "content": "<|reserved_special_token_90|>",
792
+ "lstrip": false,
793
+ "normalized": false,
794
+ "rstrip": false,
795
+ "single_word": false,
796
+ "special": true
797
+ },
798
+ "128099": {
799
+ "content": "<|reserved_special_token_91|>",
800
+ "lstrip": false,
801
+ "normalized": false,
802
+ "rstrip": false,
803
+ "single_word": false,
804
+ "special": true
805
+ },
806
+ "128100": {
807
+ "content": "<|reserved_special_token_92|>",
808
+ "lstrip": false,
809
+ "normalized": false,
810
+ "rstrip": false,
811
+ "single_word": false,
812
+ "special": true
813
+ },
814
+ "128101": {
815
+ "content": "<|reserved_special_token_93|>",
816
+ "lstrip": false,
817
+ "normalized": false,
818
+ "rstrip": false,
819
+ "single_word": false,
820
+ "special": true
821
+ },
822
+ "128102": {
823
+ "content": "<|reserved_special_token_94|>",
824
+ "lstrip": false,
825
+ "normalized": false,
826
+ "rstrip": false,
827
+ "single_word": false,
828
+ "special": true
829
+ },
830
+ "128103": {
831
+ "content": "<|reserved_special_token_95|>",
832
+ "lstrip": false,
833
+ "normalized": false,
834
+ "rstrip": false,
835
+ "single_word": false,
836
+ "special": true
837
+ },
838
+ "128104": {
839
+ "content": "<|reserved_special_token_96|>",
840
+ "lstrip": false,
841
+ "normalized": false,
842
+ "rstrip": false,
843
+ "single_word": false,
844
+ "special": true
845
+ },
846
+ "128105": {
847
+ "content": "<|reserved_special_token_97|>",
848
+ "lstrip": false,
849
+ "normalized": false,
850
+ "rstrip": false,
851
+ "single_word": false,
852
+ "special": true
853
+ },
854
+ "128106": {
855
+ "content": "<|reserved_special_token_98|>",
856
+ "lstrip": false,
857
+ "normalized": false,
858
+ "rstrip": false,
859
+ "single_word": false,
860
+ "special": true
861
+ },
862
+ "128107": {
863
+ "content": "<|reserved_special_token_99|>",
864
+ "lstrip": false,
865
+ "normalized": false,
866
+ "rstrip": false,
867
+ "single_word": false,
868
+ "special": true
869
+ },
870
+ "128108": {
871
+ "content": "<|reserved_special_token_100|>",
872
+ "lstrip": false,
873
+ "normalized": false,
874
+ "rstrip": false,
875
+ "single_word": false,
876
+ "special": true
877
+ },
878
+ "128109": {
879
+ "content": "<|reserved_special_token_101|>",
880
+ "lstrip": false,
881
+ "normalized": false,
882
+ "rstrip": false,
883
+ "single_word": false,
884
+ "special": true
885
+ },
886
+ "128110": {
887
+ "content": "<|reserved_special_token_102|>",
888
+ "lstrip": false,
889
+ "normalized": false,
890
+ "rstrip": false,
891
+ "single_word": false,
892
+ "special": true
893
+ },
894
+ "128111": {
895
+ "content": "<|reserved_special_token_103|>",
896
+ "lstrip": false,
897
+ "normalized": false,
898
+ "rstrip": false,
899
+ "single_word": false,
900
+ "special": true
901
+ },
902
+ "128112": {
903
+ "content": "<|reserved_special_token_104|>",
904
+ "lstrip": false,
905
+ "normalized": false,
906
+ "rstrip": false,
907
+ "single_word": false,
908
+ "special": true
909
+ },
910
+ "128113": {
911
+ "content": "<|reserved_special_token_105|>",
912
+ "lstrip": false,
913
+ "normalized": false,
914
+ "rstrip": false,
915
+ "single_word": false,
916
+ "special": true
917
+ },
918
+ "128114": {
919
+ "content": "<|reserved_special_token_106|>",
920
+ "lstrip": false,
921
+ "normalized": false,
922
+ "rstrip": false,
923
+ "single_word": false,
924
+ "special": true
925
+ },
926
+ "128115": {
927
+ "content": "<|reserved_special_token_107|>",
928
+ "lstrip": false,
929
+ "normalized": false,
930
+ "rstrip": false,
931
+ "single_word": false,
932
+ "special": true
933
+ },
934
+ "128116": {
935
+ "content": "<|reserved_special_token_108|>",
936
+ "lstrip": false,
937
+ "normalized": false,
938
+ "rstrip": false,
939
+ "single_word": false,
940
+ "special": true
941
+ },
942
+ "128117": {
943
+ "content": "<|reserved_special_token_109|>",
944
+ "lstrip": false,
945
+ "normalized": false,
946
+ "rstrip": false,
947
+ "single_word": false,
948
+ "special": true
949
+ },
950
+ "128118": {
951
+ "content": "<|reserved_special_token_110|>",
952
+ "lstrip": false,
953
+ "normalized": false,
954
+ "rstrip": false,
955
+ "single_word": false,
956
+ "special": true
957
+ },
958
+ "128119": {
959
+ "content": "<|reserved_special_token_111|>",
960
+ "lstrip": false,
961
+ "normalized": false,
962
+ "rstrip": false,
963
+ "single_word": false,
964
+ "special": true
965
+ },
966
+ "128120": {
967
+ "content": "<|reserved_special_token_112|>",
968
+ "lstrip": false,
969
+ "normalized": false,
970
+ "rstrip": false,
971
+ "single_word": false,
972
+ "special": true
973
+ },
974
+ "128121": {
975
+ "content": "<|reserved_special_token_113|>",
976
+ "lstrip": false,
977
+ "normalized": false,
978
+ "rstrip": false,
979
+ "single_word": false,
980
+ "special": true
981
+ },
982
+ "128122": {
983
+ "content": "<|reserved_special_token_114|>",
984
+ "lstrip": false,
985
+ "normalized": false,
986
+ "rstrip": false,
987
+ "single_word": false,
988
+ "special": true
989
+ },
990
+ "128123": {
991
+ "content": "<|reserved_special_token_115|>",
992
+ "lstrip": false,
993
+ "normalized": false,
994
+ "rstrip": false,
995
+ "single_word": false,
996
+ "special": true
997
+ },
998
+ "128124": {
999
+ "content": "<|reserved_special_token_116|>",
1000
+ "lstrip": false,
1001
+ "normalized": false,
1002
+ "rstrip": false,
1003
+ "single_word": false,
1004
+ "special": true
1005
+ },
1006
+ "128125": {
1007
+ "content": "<|reserved_special_token_117|>",
1008
+ "lstrip": false,
1009
+ "normalized": false,
1010
+ "rstrip": false,
1011
+ "single_word": false,
1012
+ "special": true
1013
+ },
1014
+ "128126": {
1015
+ "content": "<|reserved_special_token_118|>",
1016
+ "lstrip": false,
1017
+ "normalized": false,
1018
+ "rstrip": false,
1019
+ "single_word": false,
1020
+ "special": true
1021
+ },
1022
+ "128127": {
1023
+ "content": "<|reserved_special_token_119|>",
1024
+ "lstrip": false,
1025
+ "normalized": false,
1026
+ "rstrip": false,
1027
+ "single_word": false,
1028
+ "special": true
1029
+ },
1030
+ "128128": {
1031
+ "content": "<|reserved_special_token_120|>",
1032
+ "lstrip": false,
1033
+ "normalized": false,
1034
+ "rstrip": false,
1035
+ "single_word": false,
1036
+ "special": true
1037
+ },
1038
+ "128129": {
1039
+ "content": "<|reserved_special_token_121|>",
1040
+ "lstrip": false,
1041
+ "normalized": false,
1042
+ "rstrip": false,
1043
+ "single_word": false,
1044
+ "special": true
1045
+ },
1046
+ "128130": {
1047
+ "content": "<|reserved_special_token_122|>",
1048
+ "lstrip": false,
1049
+ "normalized": false,
1050
+ "rstrip": false,
1051
+ "single_word": false,
1052
+ "special": true
1053
+ },
1054
+ "128131": {
1055
+ "content": "<|reserved_special_token_123|>",
1056
+ "lstrip": false,
1057
+ "normalized": false,
1058
+ "rstrip": false,
1059
+ "single_word": false,
1060
+ "special": true
1061
+ },
1062
+ "128132": {
1063
+ "content": "<|reserved_special_token_124|>",
1064
+ "lstrip": false,
1065
+ "normalized": false,
1066
+ "rstrip": false,
1067
+ "single_word": false,
1068
+ "special": true
1069
+ },
1070
+ "128133": {
1071
+ "content": "<|reserved_special_token_125|>",
1072
+ "lstrip": false,
1073
+ "normalized": false,
1074
+ "rstrip": false,
1075
+ "single_word": false,
1076
+ "special": true
1077
+ },
1078
+ "128134": {
1079
+ "content": "<|reserved_special_token_126|>",
1080
+ "lstrip": false,
1081
+ "normalized": false,
1082
+ "rstrip": false,
1083
+ "single_word": false,
1084
+ "special": true
1085
+ },
1086
+ "128135": {
1087
+ "content": "<|reserved_special_token_127|>",
1088
+ "lstrip": false,
1089
+ "normalized": false,
1090
+ "rstrip": false,
1091
+ "single_word": false,
1092
+ "special": true
1093
+ },
1094
+ "128136": {
1095
+ "content": "<|reserved_special_token_128|>",
1096
+ "lstrip": false,
1097
+ "normalized": false,
1098
+ "rstrip": false,
1099
+ "single_word": false,
1100
+ "special": true
1101
+ },
1102
+ "128137": {
1103
+ "content": "<|reserved_special_token_129|>",
1104
+ "lstrip": false,
1105
+ "normalized": false,
1106
+ "rstrip": false,
1107
+ "single_word": false,
1108
+ "special": true
1109
+ },
1110
+ "128138": {
1111
+ "content": "<|reserved_special_token_130|>",
1112
+ "lstrip": false,
1113
+ "normalized": false,
1114
+ "rstrip": false,
1115
+ "single_word": false,
1116
+ "special": true
1117
+ },
1118
+ "128139": {
1119
+ "content": "<|reserved_special_token_131|>",
1120
+ "lstrip": false,
1121
+ "normalized": false,
1122
+ "rstrip": false,
1123
+ "single_word": false,
1124
+ "special": true
1125
+ },
1126
+ "128140": {
1127
+ "content": "<|reserved_special_token_132|>",
1128
+ "lstrip": false,
1129
+ "normalized": false,
1130
+ "rstrip": false,
1131
+ "single_word": false,
1132
+ "special": true
1133
+ },
1134
+ "128141": {
1135
+ "content": "<|reserved_special_token_133|>",
1136
+ "lstrip": false,
1137
+ "normalized": false,
1138
+ "rstrip": false,
1139
+ "single_word": false,
1140
+ "special": true
1141
+ },
1142
+ "128142": {
1143
+ "content": "<|reserved_special_token_134|>",
1144
+ "lstrip": false,
1145
+ "normalized": false,
1146
+ "rstrip": false,
1147
+ "single_word": false,
1148
+ "special": true
1149
+ },
1150
+ "128143": {
1151
+ "content": "<|reserved_special_token_135|>",
1152
+ "lstrip": false,
1153
+ "normalized": false,
1154
+ "rstrip": false,
1155
+ "single_word": false,
1156
+ "special": true
1157
+ },
1158
+ "128144": {
1159
+ "content": "<|reserved_special_token_136|>",
1160
+ "lstrip": false,
1161
+ "normalized": false,
1162
+ "rstrip": false,
1163
+ "single_word": false,
1164
+ "special": true
1165
+ },
1166
+ "128145": {
1167
+ "content": "<|reserved_special_token_137|>",
1168
+ "lstrip": false,
1169
+ "normalized": false,
1170
+ "rstrip": false,
1171
+ "single_word": false,
1172
+ "special": true
1173
+ },
1174
+ "128146": {
1175
+ "content": "<|reserved_special_token_138|>",
1176
+ "lstrip": false,
1177
+ "normalized": false,
1178
+ "rstrip": false,
1179
+ "single_word": false,
1180
+ "special": true
1181
+ },
1182
+ "128147": {
1183
+ "content": "<|reserved_special_token_139|>",
1184
+ "lstrip": false,
1185
+ "normalized": false,
1186
+ "rstrip": false,
1187
+ "single_word": false,
1188
+ "special": true
1189
+ },
1190
+ "128148": {
1191
+ "content": "<|reserved_special_token_140|>",
1192
+ "lstrip": false,
1193
+ "normalized": false,
1194
+ "rstrip": false,
1195
+ "single_word": false,
1196
+ "special": true
1197
+ },
1198
+ "128149": {
1199
+ "content": "<|reserved_special_token_141|>",
1200
+ "lstrip": false,
1201
+ "normalized": false,
1202
+ "rstrip": false,
1203
+ "single_word": false,
1204
+ "special": true
1205
+ },
1206
+ "128150": {
1207
+ "content": "<|reserved_special_token_142|>",
1208
+ "lstrip": false,
1209
+ "normalized": false,
1210
+ "rstrip": false,
1211
+ "single_word": false,
1212
+ "special": true
1213
+ },
1214
+ "128151": {
1215
+ "content": "<|reserved_special_token_143|>",
1216
+ "lstrip": false,
1217
+ "normalized": false,
1218
+ "rstrip": false,
1219
+ "single_word": false,
1220
+ "special": true
1221
+ },
1222
+ "128152": {
1223
+ "content": "<|reserved_special_token_144|>",
1224
+ "lstrip": false,
1225
+ "normalized": false,
1226
+ "rstrip": false,
1227
+ "single_word": false,
1228
+ "special": true
1229
+ },
1230
+ "128153": {
1231
+ "content": "<|reserved_special_token_145|>",
1232
+ "lstrip": false,
1233
+ "normalized": false,
1234
+ "rstrip": false,
1235
+ "single_word": false,
1236
+ "special": true
1237
+ },
1238
+ "128154": {
1239
+ "content": "<|reserved_special_token_146|>",
1240
+ "lstrip": false,
1241
+ "normalized": false,
1242
+ "rstrip": false,
1243
+ "single_word": false,
1244
+ "special": true
1245
+ },
1246
+ "128155": {
1247
+ "content": "<|reserved_special_token_147|>",
1248
+ "lstrip": false,
1249
+ "normalized": false,
1250
+ "rstrip": false,
1251
+ "single_word": false,
1252
+ "special": true
1253
+ },
1254
+ "128156": {
1255
+ "content": "<|reserved_special_token_148|>",
1256
+ "lstrip": false,
1257
+ "normalized": false,
1258
+ "rstrip": false,
1259
+ "single_word": false,
1260
+ "special": true
1261
+ },
1262
+ "128157": {
1263
+ "content": "<|reserved_special_token_149|>",
1264
+ "lstrip": false,
1265
+ "normalized": false,
1266
+ "rstrip": false,
1267
+ "single_word": false,
1268
+ "special": true
1269
+ },
1270
+ "128158": {
1271
+ "content": "<|reserved_special_token_150|>",
1272
+ "lstrip": false,
1273
+ "normalized": false,
1274
+ "rstrip": false,
1275
+ "single_word": false,
1276
+ "special": true
1277
+ },
1278
+ "128159": {
1279
+ "content": "<|reserved_special_token_151|>",
1280
+ "lstrip": false,
1281
+ "normalized": false,
1282
+ "rstrip": false,
1283
+ "single_word": false,
1284
+ "special": true
1285
+ },
1286
+ "128160": {
1287
+ "content": "<|reserved_special_token_152|>",
1288
+ "lstrip": false,
1289
+ "normalized": false,
1290
+ "rstrip": false,
1291
+ "single_word": false,
1292
+ "special": true
1293
+ },
1294
+ "128161": {
1295
+ "content": "<|reserved_special_token_153|>",
1296
+ "lstrip": false,
1297
+ "normalized": false,
1298
+ "rstrip": false,
1299
+ "single_word": false,
1300
+ "special": true
1301
+ },
1302
+ "128162": {
1303
+ "content": "<|reserved_special_token_154|>",
1304
+ "lstrip": false,
1305
+ "normalized": false,
1306
+ "rstrip": false,
1307
+ "single_word": false,
1308
+ "special": true
1309
+ },
1310
+ "128163": {
1311
+ "content": "<|reserved_special_token_155|>",
1312
+ "lstrip": false,
1313
+ "normalized": false,
1314
+ "rstrip": false,
1315
+ "single_word": false,
1316
+ "special": true
1317
+ },
1318
+ "128164": {
1319
+ "content": "<|reserved_special_token_156|>",
1320
+ "lstrip": false,
1321
+ "normalized": false,
1322
+ "rstrip": false,
1323
+ "single_word": false,
1324
+ "special": true
1325
+ },
1326
+ "128165": {
1327
+ "content": "<|reserved_special_token_157|>",
1328
+ "lstrip": false,
1329
+ "normalized": false,
1330
+ "rstrip": false,
1331
+ "single_word": false,
1332
+ "special": true
1333
+ },
1334
+ "128166": {
1335
+ "content": "<|reserved_special_token_158|>",
1336
+ "lstrip": false,
1337
+ "normalized": false,
1338
+ "rstrip": false,
1339
+ "single_word": false,
1340
+ "special": true
1341
+ },
1342
+ "128167": {
1343
+ "content": "<|reserved_special_token_159|>",
1344
+ "lstrip": false,
1345
+ "normalized": false,
1346
+ "rstrip": false,
1347
+ "single_word": false,
1348
+ "special": true
1349
+ },
1350
+ "128168": {
1351
+ "content": "<|reserved_special_token_160|>",
1352
+ "lstrip": false,
1353
+ "normalized": false,
1354
+ "rstrip": false,
1355
+ "single_word": false,
1356
+ "special": true
1357
+ },
1358
+ "128169": {
1359
+ "content": "<|reserved_special_token_161|>",
1360
+ "lstrip": false,
1361
+ "normalized": false,
1362
+ "rstrip": false,
1363
+ "single_word": false,
1364
+ "special": true
1365
+ },
1366
+ "128170": {
1367
+ "content": "<|reserved_special_token_162|>",
1368
+ "lstrip": false,
1369
+ "normalized": false,
1370
+ "rstrip": false,
1371
+ "single_word": false,
1372
+ "special": true
1373
+ },
1374
+ "128171": {
1375
+ "content": "<|reserved_special_token_163|>",
1376
+ "lstrip": false,
1377
+ "normalized": false,
1378
+ "rstrip": false,
1379
+ "single_word": false,
1380
+ "special": true
1381
+ },
1382
+ "128172": {
1383
+ "content": "<|reserved_special_token_164|>",
1384
+ "lstrip": false,
1385
+ "normalized": false,
1386
+ "rstrip": false,
1387
+ "single_word": false,
1388
+ "special": true
1389
+ },
1390
+ "128173": {
1391
+ "content": "<|reserved_special_token_165|>",
1392
+ "lstrip": false,
1393
+ "normalized": false,
1394
+ "rstrip": false,
1395
+ "single_word": false,
1396
+ "special": true
1397
+ },
1398
+ "128174": {
1399
+ "content": "<|reserved_special_token_166|>",
1400
+ "lstrip": false,
1401
+ "normalized": false,
1402
+ "rstrip": false,
1403
+ "single_word": false,
1404
+ "special": true
1405
+ },
1406
+ "128175": {
1407
+ "content": "<|reserved_special_token_167|>",
1408
+ "lstrip": false,
1409
+ "normalized": false,
1410
+ "rstrip": false,
1411
+ "single_word": false,
1412
+ "special": true
1413
+ },
1414
+ "128176": {
1415
+ "content": "<|reserved_special_token_168|>",
1416
+ "lstrip": false,
1417
+ "normalized": false,
1418
+ "rstrip": false,
1419
+ "single_word": false,
1420
+ "special": true
1421
+ },
1422
+ "128177": {
1423
+ "content": "<|reserved_special_token_169|>",
1424
+ "lstrip": false,
1425
+ "normalized": false,
1426
+ "rstrip": false,
1427
+ "single_word": false,
1428
+ "special": true
1429
+ },
1430
+ "128178": {
1431
+ "content": "<|reserved_special_token_170|>",
1432
+ "lstrip": false,
1433
+ "normalized": false,
1434
+ "rstrip": false,
1435
+ "single_word": false,
1436
+ "special": true
1437
+ },
1438
+ "128179": {
1439
+ "content": "<|reserved_special_token_171|>",
1440
+ "lstrip": false,
1441
+ "normalized": false,
1442
+ "rstrip": false,
1443
+ "single_word": false,
1444
+ "special": true
1445
+ },
1446
+ "128180": {
1447
+ "content": "<|reserved_special_token_172|>",
1448
+ "lstrip": false,
1449
+ "normalized": false,
1450
+ "rstrip": false,
1451
+ "single_word": false,
1452
+ "special": true
1453
+ },
1454
+ "128181": {
1455
+ "content": "<|reserved_special_token_173|>",
1456
+ "lstrip": false,
1457
+ "normalized": false,
1458
+ "rstrip": false,
1459
+ "single_word": false,
1460
+ "special": true
1461
+ },
1462
+ "128182": {
1463
+ "content": "<|reserved_special_token_174|>",
1464
+ "lstrip": false,
1465
+ "normalized": false,
1466
+ "rstrip": false,
1467
+ "single_word": false,
1468
+ "special": true
1469
+ },
1470
+ "128183": {
1471
+ "content": "<|reserved_special_token_175|>",
1472
+ "lstrip": false,
1473
+ "normalized": false,
1474
+ "rstrip": false,
1475
+ "single_word": false,
1476
+ "special": true
1477
+ },
1478
+ "128184": {
1479
+ "content": "<|reserved_special_token_176|>",
1480
+ "lstrip": false,
1481
+ "normalized": false,
1482
+ "rstrip": false,
1483
+ "single_word": false,
1484
+ "special": true
1485
+ },
1486
+ "128185": {
1487
+ "content": "<|reserved_special_token_177|>",
1488
+ "lstrip": false,
1489
+ "normalized": false,
1490
+ "rstrip": false,
1491
+ "single_word": false,
1492
+ "special": true
1493
+ },
1494
+ "128186": {
1495
+ "content": "<|reserved_special_token_178|>",
1496
+ "lstrip": false,
1497
+ "normalized": false,
1498
+ "rstrip": false,
1499
+ "single_word": false,
1500
+ "special": true
1501
+ },
1502
+ "128187": {
1503
+ "content": "<|reserved_special_token_179|>",
1504
+ "lstrip": false,
1505
+ "normalized": false,
1506
+ "rstrip": false,
1507
+ "single_word": false,
1508
+ "special": true
1509
+ },
1510
+ "128188": {
1511
+ "content": "<|reserved_special_token_180|>",
1512
+ "lstrip": false,
1513
+ "normalized": false,
1514
+ "rstrip": false,
1515
+ "single_word": false,
1516
+ "special": true
1517
+ },
1518
+ "128189": {
1519
+ "content": "<|reserved_special_token_181|>",
1520
+ "lstrip": false,
1521
+ "normalized": false,
1522
+ "rstrip": false,
1523
+ "single_word": false,
1524
+ "special": true
1525
+ },
1526
+ "128190": {
1527
+ "content": "<|reserved_special_token_182|>",
1528
+ "lstrip": false,
1529
+ "normalized": false,
1530
+ "rstrip": false,
1531
+ "single_word": false,
1532
+ "special": true
1533
+ },
1534
+ "128191": {
1535
+ "content": "<|reserved_special_token_183|>",
1536
+ "lstrip": false,
1537
+ "normalized": false,
1538
+ "rstrip": false,
1539
+ "single_word": false,
1540
+ "special": true
1541
+ },
1542
+ "128192": {
1543
+ "content": "<|reserved_special_token_184|>",
1544
+ "lstrip": false,
1545
+ "normalized": false,
1546
+ "rstrip": false,
1547
+ "single_word": false,
1548
+ "special": true
1549
+ },
1550
+ "128193": {
1551
+ "content": "<|reserved_special_token_185|>",
1552
+ "lstrip": false,
1553
+ "normalized": false,
1554
+ "rstrip": false,
1555
+ "single_word": false,
1556
+ "special": true
1557
+ },
1558
+ "128194": {
1559
+ "content": "<|reserved_special_token_186|>",
1560
+ "lstrip": false,
1561
+ "normalized": false,
1562
+ "rstrip": false,
1563
+ "single_word": false,
1564
+ "special": true
1565
+ },
1566
+ "128195": {
1567
+ "content": "<|reserved_special_token_187|>",
1568
+ "lstrip": false,
1569
+ "normalized": false,
1570
+ "rstrip": false,
1571
+ "single_word": false,
1572
+ "special": true
1573
+ },
1574
+ "128196": {
1575
+ "content": "<|reserved_special_token_188|>",
1576
+ "lstrip": false,
1577
+ "normalized": false,
1578
+ "rstrip": false,
1579
+ "single_word": false,
1580
+ "special": true
1581
+ },
1582
+ "128197": {
1583
+ "content": "<|reserved_special_token_189|>",
1584
+ "lstrip": false,
1585
+ "normalized": false,
1586
+ "rstrip": false,
1587
+ "single_word": false,
1588
+ "special": true
1589
+ },
1590
+ "128198": {
1591
+ "content": "<|reserved_special_token_190|>",
1592
+ "lstrip": false,
1593
+ "normalized": false,
1594
+ "rstrip": false,
1595
+ "single_word": false,
1596
+ "special": true
1597
+ },
1598
+ "128199": {
1599
+ "content": "<|reserved_special_token_191|>",
1600
+ "lstrip": false,
1601
+ "normalized": false,
1602
+ "rstrip": false,
1603
+ "single_word": false,
1604
+ "special": true
1605
+ },
1606
+ "128200": {
1607
+ "content": "<|reserved_special_token_192|>",
1608
+ "lstrip": false,
1609
+ "normalized": false,
1610
+ "rstrip": false,
1611
+ "single_word": false,
1612
+ "special": true
1613
+ },
1614
+ "128201": {
1615
+ "content": "<|reserved_special_token_193|>",
1616
+ "lstrip": false,
1617
+ "normalized": false,
1618
+ "rstrip": false,
1619
+ "single_word": false,
1620
+ "special": true
1621
+ },
1622
+ "128202": {
1623
+ "content": "<|reserved_special_token_194|>",
1624
+ "lstrip": false,
1625
+ "normalized": false,
1626
+ "rstrip": false,
1627
+ "single_word": false,
1628
+ "special": true
1629
+ },
1630
+ "128203": {
1631
+ "content": "<|reserved_special_token_195|>",
1632
+ "lstrip": false,
1633
+ "normalized": false,
1634
+ "rstrip": false,
1635
+ "single_word": false,
1636
+ "special": true
1637
+ },
1638
+ "128204": {
1639
+ "content": "<|reserved_special_token_196|>",
1640
+ "lstrip": false,
1641
+ "normalized": false,
1642
+ "rstrip": false,
1643
+ "single_word": false,
1644
+ "special": true
1645
+ },
1646
+ "128205": {
1647
+ "content": "<|reserved_special_token_197|>",
1648
+ "lstrip": false,
1649
+ "normalized": false,
1650
+ "rstrip": false,
1651
+ "single_word": false,
1652
+ "special": true
1653
+ },
1654
+ "128206": {
1655
+ "content": "<|reserved_special_token_198|>",
1656
+ "lstrip": false,
1657
+ "normalized": false,
1658
+ "rstrip": false,
1659
+ "single_word": false,
1660
+ "special": true
1661
+ },
1662
+ "128207": {
1663
+ "content": "<|reserved_special_token_199|>",
1664
+ "lstrip": false,
1665
+ "normalized": false,
1666
+ "rstrip": false,
1667
+ "single_word": false,
1668
+ "special": true
1669
+ },
1670
+ "128208": {
1671
+ "content": "<|reserved_special_token_200|>",
1672
+ "lstrip": false,
1673
+ "normalized": false,
1674
+ "rstrip": false,
1675
+ "single_word": false,
1676
+ "special": true
1677
+ },
1678
+ "128209": {
1679
+ "content": "<|reserved_special_token_201|>",
1680
+ "lstrip": false,
1681
+ "normalized": false,
1682
+ "rstrip": false,
1683
+ "single_word": false,
1684
+ "special": true
1685
+ },
1686
+ "128210": {
1687
+ "content": "<|reserved_special_token_202|>",
1688
+ "lstrip": false,
1689
+ "normalized": false,
1690
+ "rstrip": false,
1691
+ "single_word": false,
1692
+ "special": true
1693
+ },
1694
+ "128211": {
1695
+ "content": "<|reserved_special_token_203|>",
1696
+ "lstrip": false,
1697
+ "normalized": false,
1698
+ "rstrip": false,
1699
+ "single_word": false,
1700
+ "special": true
1701
+ },
1702
+ "128212": {
1703
+ "content": "<|reserved_special_token_204|>",
1704
+ "lstrip": false,
1705
+ "normalized": false,
1706
+ "rstrip": false,
1707
+ "single_word": false,
1708
+ "special": true
1709
+ },
1710
+ "128213": {
1711
+ "content": "<|reserved_special_token_205|>",
1712
+ "lstrip": false,
1713
+ "normalized": false,
1714
+ "rstrip": false,
1715
+ "single_word": false,
1716
+ "special": true
1717
+ },
1718
+ "128214": {
1719
+ "content": "<|reserved_special_token_206|>",
1720
+ "lstrip": false,
1721
+ "normalized": false,
1722
+ "rstrip": false,
1723
+ "single_word": false,
1724
+ "special": true
1725
+ },
1726
+ "128215": {
1727
+ "content": "<|reserved_special_token_207|>",
1728
+ "lstrip": false,
1729
+ "normalized": false,
1730
+ "rstrip": false,
1731
+ "single_word": false,
1732
+ "special": true
1733
+ },
1734
+ "128216": {
1735
+ "content": "<|reserved_special_token_208|>",
1736
+ "lstrip": false,
1737
+ "normalized": false,
1738
+ "rstrip": false,
1739
+ "single_word": false,
1740
+ "special": true
1741
+ },
1742
+ "128217": {
1743
+ "content": "<|reserved_special_token_209|>",
1744
+ "lstrip": false,
1745
+ "normalized": false,
1746
+ "rstrip": false,
1747
+ "single_word": false,
1748
+ "special": true
1749
+ },
1750
+ "128218": {
1751
+ "content": "<|reserved_special_token_210|>",
1752
+ "lstrip": false,
1753
+ "normalized": false,
1754
+ "rstrip": false,
1755
+ "single_word": false,
1756
+ "special": true
1757
+ },
1758
+ "128219": {
1759
+ "content": "<|reserved_special_token_211|>",
1760
+ "lstrip": false,
1761
+ "normalized": false,
1762
+ "rstrip": false,
1763
+ "single_word": false,
1764
+ "special": true
1765
+ },
1766
+ "128220": {
1767
+ "content": "<|reserved_special_token_212|>",
1768
+ "lstrip": false,
1769
+ "normalized": false,
1770
+ "rstrip": false,
1771
+ "single_word": false,
1772
+ "special": true
1773
+ },
1774
+ "128221": {
1775
+ "content": "<|reserved_special_token_213|>",
1776
+ "lstrip": false,
1777
+ "normalized": false,
1778
+ "rstrip": false,
1779
+ "single_word": false,
1780
+ "special": true
1781
+ },
1782
+ "128222": {
1783
+ "content": "<|reserved_special_token_214|>",
1784
+ "lstrip": false,
1785
+ "normalized": false,
1786
+ "rstrip": false,
1787
+ "single_word": false,
1788
+ "special": true
1789
+ },
1790
+ "128223": {
1791
+ "content": "<|reserved_special_token_215|>",
1792
+ "lstrip": false,
1793
+ "normalized": false,
1794
+ "rstrip": false,
1795
+ "single_word": false,
1796
+ "special": true
1797
+ },
1798
+ "128224": {
1799
+ "content": "<|reserved_special_token_216|>",
1800
+ "lstrip": false,
1801
+ "normalized": false,
1802
+ "rstrip": false,
1803
+ "single_word": false,
1804
+ "special": true
1805
+ },
1806
+ "128225": {
1807
+ "content": "<|reserved_special_token_217|>",
1808
+ "lstrip": false,
1809
+ "normalized": false,
1810
+ "rstrip": false,
1811
+ "single_word": false,
1812
+ "special": true
1813
+ },
1814
+ "128226": {
1815
+ "content": "<|reserved_special_token_218|>",
1816
+ "lstrip": false,
1817
+ "normalized": false,
1818
+ "rstrip": false,
1819
+ "single_word": false,
1820
+ "special": true
1821
+ },
1822
+ "128227": {
1823
+ "content": "<|reserved_special_token_219|>",
1824
+ "lstrip": false,
1825
+ "normalized": false,
1826
+ "rstrip": false,
1827
+ "single_word": false,
1828
+ "special": true
1829
+ },
1830
+ "128228": {
1831
+ "content": "<|reserved_special_token_220|>",
1832
+ "lstrip": false,
1833
+ "normalized": false,
1834
+ "rstrip": false,
1835
+ "single_word": false,
1836
+ "special": true
1837
+ },
1838
+ "128229": {
1839
+ "content": "<|reserved_special_token_221|>",
1840
+ "lstrip": false,
1841
+ "normalized": false,
1842
+ "rstrip": false,
1843
+ "single_word": false,
1844
+ "special": true
1845
+ },
1846
+ "128230": {
1847
+ "content": "<|reserved_special_token_222|>",
1848
+ "lstrip": false,
1849
+ "normalized": false,
1850
+ "rstrip": false,
1851
+ "single_word": false,
1852
+ "special": true
1853
+ },
1854
+ "128231": {
1855
+ "content": "<|reserved_special_token_223|>",
1856
+ "lstrip": false,
1857
+ "normalized": false,
1858
+ "rstrip": false,
1859
+ "single_word": false,
1860
+ "special": true
1861
+ },
1862
+ "128232": {
1863
+ "content": "<|reserved_special_token_224|>",
1864
+ "lstrip": false,
1865
+ "normalized": false,
1866
+ "rstrip": false,
1867
+ "single_word": false,
1868
+ "special": true
1869
+ },
1870
+ "128233": {
1871
+ "content": "<|reserved_special_token_225|>",
1872
+ "lstrip": false,
1873
+ "normalized": false,
1874
+ "rstrip": false,
1875
+ "single_word": false,
1876
+ "special": true
1877
+ },
1878
+ "128234": {
1879
+ "content": "<|reserved_special_token_226|>",
1880
+ "lstrip": false,
1881
+ "normalized": false,
1882
+ "rstrip": false,
1883
+ "single_word": false,
1884
+ "special": true
1885
+ },
1886
+ "128235": {
1887
+ "content": "<|reserved_special_token_227|>",
1888
+ "lstrip": false,
1889
+ "normalized": false,
1890
+ "rstrip": false,
1891
+ "single_word": false,
1892
+ "special": true
1893
+ },
1894
+ "128236": {
1895
+ "content": "<|reserved_special_token_228|>",
1896
+ "lstrip": false,
1897
+ "normalized": false,
1898
+ "rstrip": false,
1899
+ "single_word": false,
1900
+ "special": true
1901
+ },
1902
+ "128237": {
1903
+ "content": "<|reserved_special_token_229|>",
1904
+ "lstrip": false,
1905
+ "normalized": false,
1906
+ "rstrip": false,
1907
+ "single_word": false,
1908
+ "special": true
1909
+ },
1910
+ "128238": {
1911
+ "content": "<|reserved_special_token_230|>",
1912
+ "lstrip": false,
1913
+ "normalized": false,
1914
+ "rstrip": false,
1915
+ "single_word": false,
1916
+ "special": true
1917
+ },
1918
+ "128239": {
1919
+ "content": "<|reserved_special_token_231|>",
1920
+ "lstrip": false,
1921
+ "normalized": false,
1922
+ "rstrip": false,
1923
+ "single_word": false,
1924
+ "special": true
1925
+ },
1926
+ "128240": {
1927
+ "content": "<|reserved_special_token_232|>",
1928
+ "lstrip": false,
1929
+ "normalized": false,
1930
+ "rstrip": false,
1931
+ "single_word": false,
1932
+ "special": true
1933
+ },
1934
+ "128241": {
1935
+ "content": "<|reserved_special_token_233|>",
1936
+ "lstrip": false,
1937
+ "normalized": false,
1938
+ "rstrip": false,
1939
+ "single_word": false,
1940
+ "special": true
1941
+ },
1942
+ "128242": {
1943
+ "content": "<|reserved_special_token_234|>",
1944
+ "lstrip": false,
1945
+ "normalized": false,
1946
+ "rstrip": false,
1947
+ "single_word": false,
1948
+ "special": true
1949
+ },
1950
+ "128243": {
1951
+ "content": "<|reserved_special_token_235|>",
1952
+ "lstrip": false,
1953
+ "normalized": false,
1954
+ "rstrip": false,
1955
+ "single_word": false,
1956
+ "special": true
1957
+ },
1958
+ "128244": {
1959
+ "content": "<|reserved_special_token_236|>",
1960
+ "lstrip": false,
1961
+ "normalized": false,
1962
+ "rstrip": false,
1963
+ "single_word": false,
1964
+ "special": true
1965
+ },
1966
+ "128245": {
1967
+ "content": "<|reserved_special_token_237|>",
1968
+ "lstrip": false,
1969
+ "normalized": false,
1970
+ "rstrip": false,
1971
+ "single_word": false,
1972
+ "special": true
1973
+ },
1974
+ "128246": {
1975
+ "content": "<|reserved_special_token_238|>",
1976
+ "lstrip": false,
1977
+ "normalized": false,
1978
+ "rstrip": false,
1979
+ "single_word": false,
1980
+ "special": true
1981
+ },
1982
+ "128247": {
1983
+ "content": "<|reserved_special_token_239|>",
1984
+ "lstrip": false,
1985
+ "normalized": false,
1986
+ "rstrip": false,
1987
+ "single_word": false,
1988
+ "special": true
1989
+ },
1990
+ "128248": {
1991
+ "content": "<|reserved_special_token_240|>",
1992
+ "lstrip": false,
1993
+ "normalized": false,
1994
+ "rstrip": false,
1995
+ "single_word": false,
1996
+ "special": true
1997
+ },
1998
+ "128249": {
1999
+ "content": "<|reserved_special_token_241|>",
2000
+ "lstrip": false,
2001
+ "normalized": false,
2002
+ "rstrip": false,
2003
+ "single_word": false,
2004
+ "special": true
2005
+ },
2006
+ "128250": {
2007
+ "content": "<|reserved_special_token_242|>",
2008
+ "lstrip": false,
2009
+ "normalized": false,
2010
+ "rstrip": false,
2011
+ "single_word": false,
2012
+ "special": true
2013
+ },
2014
+ "128251": {
2015
+ "content": "<|reserved_special_token_243|>",
2016
+ "lstrip": false,
2017
+ "normalized": false,
2018
+ "rstrip": false,
2019
+ "single_word": false,
2020
+ "special": true
2021
+ },
2022
+ "128252": {
2023
+ "content": "<|reserved_special_token_244|>",
2024
+ "lstrip": false,
2025
+ "normalized": false,
2026
+ "rstrip": false,
2027
+ "single_word": false,
2028
+ "special": true
2029
+ },
2030
+ "128253": {
2031
+ "content": "<|reserved_special_token_245|>",
2032
+ "lstrip": false,
2033
+ "normalized": false,
2034
+ "rstrip": false,
2035
+ "single_word": false,
2036
+ "special": true
2037
+ },
2038
+ "128254": {
2039
+ "content": "<|reserved_special_token_246|>",
2040
+ "lstrip": false,
2041
+ "normalized": false,
2042
+ "rstrip": false,
2043
+ "single_word": false,
2044
+ "special": true
2045
+ },
2046
+ "128255": {
2047
+ "content": "<|reserved_special_token_247|>",
2048
+ "lstrip": false,
2049
+ "normalized": false,
2050
+ "rstrip": false,
2051
+ "single_word": false,
2052
+ "special": true
2053
+ }
2054
+ },
2055
+ "bos_token": "<|begin▁of▁sentence|>",
2056
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
2057
+ "clean_up_tokenization_spaces": false,
2058
+ "eos_token": "<|end▁of▁sentence|>",
2059
+ "extra_special_tokens": {},
2060
+ "legacy": true,
2061
+ "model_max_length": 16384,
2062
+ "pad_token": "<|end▁of▁sentence|>",
2063
+ "sp_model_kwargs": {},
2064
+ "tokenizer_class": "LlamaTokenizerFast",
2065
+ "unk_token": null,
2066
+ "use_default_system_prompt": false
2067
+ }