def FunCineForgeTokenizer(init_param_path, **kwargs): from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(init_param_path) special_tokens = { 'eos_token': '<|endoftext|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': [ '<|im_start|>', '<|im_end|>', '<|startofclue|>', '<|endofclue|>', '<|endofprompt|>', '[breath]', '', '', '[noise]', '[laughter]', '[cough]', '[clucking]', '[accent]', '[quick_breath]', "", "", "[hissing]", "[sigh]", "[vocalized-noise]", "[lipsmack]", "[mn]", "<|endofsystem|>" ] } tokenizer.add_special_tokens(special_tokens) return tokenizer