from typing import Optional, Union import torch import transformers from transformers import ProcessorMixin try: from .asr_config import DEFAULT_ENCODER_CONV_LAYERS, ASRConfig, compute_encoder_output_length except ImportError: from asr_config import ( # type: ignore[no-redef] DEFAULT_ENCODER_CONV_LAYERS, ASRConfig, compute_encoder_output_length, ) class ASRProcessor(ProcessorMixin): """Processor for Whisper-based ASR models.""" attributes = ["feature_extractor", "tokenizer"] feature_extractor_class = "AutoFeatureExtractor" tokenizer_class = "AutoTokenizer" AUDIO_TOKEN = "