{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "BertNormalizer", "clean_text": true, "handle_chinese_chars": true, "strip_accents": null, "lowercase": true }, "pre_tokenizer": { "type": "BertPreTokenizer" }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 1 } } ], "special_tokens": { "[CLS]": { "id": "[CLS]", "ids": [ 2 ], "tokens": [ "[CLS]" ] }, "[SEP]": { "id": "[SEP]", "ids": [ 3 ], "tokens": [ "[SEP]" ] } } }, "decoder": { "type": "WordPiece", "prefix": "##", "cleanup": true }, "model": { "type": "WordPiece", "unk_token": "[UNK]", "continuing_subword_prefix": "##", "max_input_chars_per_word": 100, "vocab": { "[PAD]": 0, "[UNK]": 1, "[CLS]": 2, "[SEP]": 3, "[MASK]": 4, "!": 5, "\"": 6, "#": 7, "$": 8, "%": 9, "&": 10, "'": 11, "(": 12, ")": 13, "*": 14, "+": 15, ",": 16, "-": 17, ".": 18, "/": 19, "0": 20, "1": 21, "2": 22, "3": 23, "4": 24, "5": 25, "6": 26, "7": 27, "8": 28, "9": 29, ":": 30, ";": 31, "<": 32, "=": 33, ">": 34, "?": 35, "@": 36, "A": 37, "B": 38, "C": 39, "D": 40, "E": 41, "F": 42, "G": 43, "H": 44, "I": 45, "J": 46, "K": 47, "L": 48, "M": 49, "N": 50, "O": 51, "P": 52, "Q": 53, "R": 54, "S": 55, "T": 56, "U": 57, "V": 58, "W": 59, "X": 60, "Y": 61, "Z": 62, "[": 63, "\\": 64, "]": 65, "^": 66, "_": 67, "`": 68, "b": 70, "c": 71, "d": 72, "e": 73, "f": 74, "g": 75, "h": 76, "j": 78, "k": 79, "l": 80, "m": 81, "n": 82, "o": 83, "p": 84, "q": 85, "r": 86, "s": 87, "t": 88, "u": 89, "v": 90, "w": 91, "x": 92, "y": 93, "z": 94, "{": 95, "|": 96, "}": 97, "~": 98, "the": 99, "be": 100, "to": 101, "of": 102, "and": 103, "a": 104, "in": 105, "that": 106, "have": 107, "i": 108, "it": 109, "for": 110, "not": 111, "on": 112, "with": 113, "he": 114, "as": 115, "you": 116, "do": 117, "at": 118, "this": 119, "but": 120, "his": 121, "by": 122, "from": 123, "they": 124, "we": 125, "say": 126, "her": 127, "she": 128, "or": 129, "an": 130, "will": 131, "my": 132, "one": 133, "all": 134, "would": 135, "there": 136, "their": 137, "what": 138, "so": 139, "up": 140, "out": 141, "if": 142, "about": 143, "who": 144, "get": 145, "which": 146, "go": 147, "me": 148, "when": 149, "make": 150, "can": 151, "like": 152, "time": 153, "no": 154, "just": 155, "him": 156, "know": 157, "take": 158, "people": 159, "into": 160, "year": 161, "your": 162, "good": 163, "some": 164, "could": 165, "them": 166, "see": 167, "other": 168, "than": 169, "then": 170, "now": 171, "look": 172, "only": 173, "come": 174, "its": 175, "over": 176, "think": 177, "also": 178, "back": 179, "after": 180, "use": 181, "two": 182, "how": 183, "our": 184, "work": 185, "first": 186, "well": 187, "way": 188, "even": 189, "new": 190, "want": 191, "because": 192, "any": 193, "these": 194, "give": 195, "day": 196, "most": 197, "us ": 198 } } }