from typing import Optional, Union import torch import transformers from transformers import ProcessorMixin try: from .asr_config import DEFAULT_ENCODER_CONV_LAYERS, ASRConfig except ImportError: from asr_config import DEFAULT_ENCODER_CONV_LAYERS, ASRConfig # type: ignore[no-redef] class ASRProcessor(ProcessorMixin): """Processor for Whisper-based ASR models.""" attributes = ["feature_extractor", "tokenizer"] feature_extractor_class = "AutoFeatureExtractor" tokenizer_class = "AutoTokenizer" AUDIO_TOKEN = "