{ "add_prefix_space": false, "backend": "tokenizers", "bos_token": "", "clean_up_tokenization_spaces": false, "effective_vocab_size": 114822, "eos_token": "", "errors": "replace", "fix_mistral_regex": true, "is_local": false, "local_files_only": false, "model_max_length": 131072, "model_type": "byte_level_bpe", "no_audio_codec_tokens": true, "no_dense_timestamp_tokens": true, "open_formosa": { "required_special_token_count": 157, "required_special_tokens_present": true, "required_special_tokens_single_id": true, "standard_special_tokens": { "bos_token": "", "eos_token": "", "pad_token": "", "unk_token": "" } }, "pad_token": "", "padding_side": "right", "rich_transcription": { "allow_non_speech_events": true, "compact_json": true, "default_format": "json_segments", "enabled": true, "include_content": true, "include_speaker": true, "include_start_end": true, "no_dense_timestamp_tokens": true, "timestamp_precision_digits": 2, "timestamp_unit": "seconds" }, "special_tokens": [ "<|pad|>", "<|bos|>", "<|eos|>", "<|unk|>", "<|system|>", "<|user_channel|>", "<|assistant_channel|>", "<|task:speech_to_text|>", "<|task:text_to_speech|>", "<|input_audio_start|>", "<|input_audio_end|>", "<|audio_ref_start|>", "<|audio_ref_end|>", "<|audio_start|>", "<|audio_end|>", "<|speech_start|>", "<|speech_end|>", "<|transcript_start|>", "<|transcript_end|>", "<|segment_start|>", "<|segment_end|>", "<|speaker|>", "<|start_time|>", "<|end_time|>", "<|duration|>", "<|content|>", "<|non_speech_event|>", "<|retrieval_result_start|>", "<|retrieval_result_end|>", "<|ocr_start|>", "<|ocr_end|>", "<|image_start|>", "<|image_end|>", "<|video_start|>", "<|video_end|>" ], "strict_no_dense_timestamp_tokens": true, "tokenizer_class": "GPT2Tokenizer", "truncation_side": "right", "unk_token": "", "vocab_size": 114688 }