Spaces:
Running on Zero
Running on Zero
File size: 795 Bytes
03022ee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | def FunCineForgeTokenizer(init_param_path, **kwargs):
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(init_param_path)
special_tokens = {
'eos_token': '<|endoftext|>',
'pad_token': '<|endoftext|>',
'additional_special_tokens': [
'<|im_start|>', '<|im_end|>',
'<|startofclue|>', '<|endofclue|>', '<|endofprompt|>',
'[breath]', '<strong>', '</strong>', '[noise]',
'[laughter]', '[cough]', '[clucking]', '[accent]',
'[quick_breath]',
"<laughter>", "</laughter>",
"[hissing]", "[sigh]", "[vocalized-noise]",
"[lipsmack]", "[mn]", "<|endofsystem|>"
]
}
tokenizer.add_special_tokens(special_tokens)
return tokenizer |