checkpoints / tokenizer.json
lvwerra's picture
lvwerra HF Staff
lvwerra/atomiclm-dev
6d5862e verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 98,
"content": "<|pad|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 99,
"content": "<|unk|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 100,
"content": "<|begin_of_text|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 101,
"content": "<|end_of_text|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 102,
"content": "<|im_start|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 103,
"content": "<|im_end|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 104,
"content": "<|system|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 105,
"content": "<|user|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 106,
"content": "<|assistant|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 107,
"content": "<|tool_call|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 108,
"content": "<|tool_response|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 109,
"content": "<think>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 110,
"content": "</think>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 111,
"content": "<|reserved_0|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 112,
"content": "<|reserved_1|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 113,
"content": "<|reserved_2|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 114,
"content": "<|reserved_3|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 115,
"content": "<|reserved_4|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 116,
"content": "<|reserved_5|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 117,
"content": "<|reserved_6|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 118,
"content": "<|reserved_7|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 119,
"content": "<|reserved_8|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 120,
"content": "<|reserved_9|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 121,
"content": "<|reserved_10|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 122,
"content": "<|reserved_11|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 123,
"content": "<|reserved_12|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 124,
"content": "<|reserved_13|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 125,
"content": "<|reserved_14|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 126,
"content": "<|reserved_15|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 127,
"content": "<|reserved_16|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Sequence",
"normalizers": [
{
"type": "Replace",
"pattern": {
"Regex": "\\r\\n"
},
"content": "\n"
},
{
"type": "Replace",
"pattern": {
"Regex": "\\r"
},
"content": "\n"
},
{
"type": "Replace",
"pattern": {
"String": " "
},
"content": " "
}
]
},
"pre_tokenizer": {
"type": "Split",
"pattern": {
"Regex": "[\\s\\S]"
},
"behavior": "Isolated",
"invert": false
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
}
],
"pair": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {}
},
"decoder": {
"type": "Fuse"
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "<|unk|>",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
" ": 0,
"!": 1,
"\"": 2,
"#": 3,
"$": 4,
"%": 5,
"&": 6,
"'": 7,
"(": 8,
")": 9,
"*": 10,
"+": 11,
",": 12,
"-": 13,
".": 14,
"/": 15,
"0": 16,
"1": 17,
"2": 18,
"3": 19,
"4": 20,
"5": 21,
"6": 22,
"7": 23,
"8": 24,
"9": 25,
":": 26,
";": 27,
"<": 28,
"=": 29,
">": 30,
"?": 31,
"@": 32,
"A": 33,
"B": 34,
"C": 35,
"D": 36,
"E": 37,
"F": 38,
"G": 39,
"H": 40,
"I": 41,
"J": 42,
"K": 43,
"L": 44,
"M": 45,
"N": 46,
"O": 47,
"P": 48,
"Q": 49,
"R": 50,
"S": 51,
"T": 52,
"U": 53,
"V": 54,
"W": 55,
"X": 56,
"Y": 57,
"Z": 58,
"[": 59,
"\\": 60,
"]": 61,
"^": 62,
"_": 63,
"`": 64,
"a": 65,
"b": 66,
"c": 67,
"d": 68,
"e": 69,
"f": 70,
"g": 71,
"h": 72,
"i": 73,
"j": 74,
"k": 75,
"l": 76,
"m": 77,
"n": 78,
"o": 79,
"p": 80,
"q": 81,
"r": 82,
"s": 83,
"t": 84,
"u": 85,
"v": 86,
"w": 87,
"x": 88,
"y": 89,
"z": 90,
"{": 91,
"|": 92,
"}": 93,
"~": 94,
"\n": 95,
"\t": 96,
"\r": 97,
"<|pad|>": 98,
"<|unk|>": 99,
"<|begin_of_text|>": 100,
"<|end_of_text|>": 101,
"<|im_start|>": 102,
"<|im_end|>": 103,
"<|system|>": 104,
"<|user|>": 105,
"<|assistant|>": 106,
"<|tool_call|>": 107,
"<|tool_response|>": 108,
"<think>": 109,
"</think>": 110,
"<|reserved_0|>": 111,
"<|reserved_1|>": 112,
"<|reserved_2|>": 113,
"<|reserved_3|>": 114,
"<|reserved_4|>": 115,
"<|reserved_5|>": 116,
"<|reserved_6|>": 117,
"<|reserved_7|>": 118,
"<|reserved_8|>": 119,
"<|reserved_9|>": 120,
"<|reserved_10|>": 121,
"<|reserved_11|>": 122,
"<|reserved_12|>": 123,
"<|reserved_13|>": 124,
"<|reserved_14|>": 125,
"<|reserved_15|>": 126,
"<|reserved_16|>": 127
},
"merges": []
}
}