PangolinTokenizer / evaluation_report.json
voidful's picture
Add Open Formosa special tokens
989d8ec verified
{
"ok": true,
"tokenizer": "voidful/PangolinTokenizer",
"base_vocab_size": 114688,
"effective_vocab_size": 114822,
"required_special_token_count": 157,
"required_special_tokens_missing": [],
"required_special_tokens_single_id": true,
"standard_special_tokens": {
"bos_token": "<s>",
"eos_token": "</s>",
"unk_token": "<unk>",
"pad_token": "<pad>"
},
"banned_token_matches": {
"audio_numeric_tokens": [],
"dense_timestamp_tokens": []
},
"smoke_texts": {
"traditional_chinese": "台灣本土語言模型需要保留繁體中文、注音ㄅㄆㄇ與台語漢字。",
"duplex": "<|task:full_duplex_speech|><|audio_ref_start|>audio://utterance_000001<|audio_ref_end|>",
"tool_call": "<|tool_call_start|>{\"name\":\"search_documents\",\"arguments\":{\"query\":\"健保年金\"}}<|tool_call_end|>"
}
}