def FunCineForgeTokenizer(init_param_path, **kwargs):
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(init_param_path)
special_tokens = {
'eos_token': '<|endoftext|>',
'pad_token': '<|endoftext|>',
'additional_special_tokens': [
'<|im_start|>', '<|im_end|>',
'<|startofclue|>', '<|endofclue|>', '<|endofprompt|>',
'[breath]', '', '', '[noise]',
'[laughter]', '[cough]', '[clucking]', '[accent]',
'[quick_breath]',
"", "",
"[hissing]", "[sigh]", "[vocalized-noise]",
"[lipsmack]", "[mn]", "<|endofsystem|>"
]
}
tokenizer.add_special_tokens(special_tokens)
return tokenizer