NeuraCraft
/

MiniWhisper-ASR

Automatic Speech Recognition

text2text-generation

Model card Files Files and versions

MiniWhisper-ASR / processing_mini_whisper.py

NeuraCraft's picture

Update Main Files

f8a9c31 verified 8 days ago

history blame contribute delete

1.5 kB

	# processing_mini_whisper.py
	from ...processing_utils import ProcessorMixin
	from ...utils import auto_docstring


	@auto_docstring
	class MiniWhisperProcessor(ProcessorMixin):
	def __init__(self, feature_extractor, tokenizer):
	super().__init__(feature_extractor, tokenizer)

	def get_decoder_prompt_ids(self, task=None, language=None, no_timestamps=True):
	return self.tokenizer.get_decoder_prompt_ids(task=task, language=language, no_timestamps=no_timestamps)

	def get_prompt_ids(self, text: str, return_tensors="np"):
	return self.tokenizer.get_prompt_ids(text, return_tensors=return_tensors)

	@auto_docstring
	def __call__(self, args, *kwargs):
	audio = kwargs.pop("audio", None)
	sampling_rate = kwargs.pop("sampling_rate", None)
	text = kwargs.pop("text", None)
	if len(args) > 0:
	audio = args[0]
	args = args[1:]

	if audio is None and text is None:
	raise ValueError("You need to specify either an `audio` or `text` input to process.")

	if audio is not None:
	inputs = self.feature_extractor(audio, args, sampling_rate=sampling_rate, *kwargs)
	if text is not None:
	encodings = self.tokenizer(text, **kwargs)

	if text is None:
	return inputs

	elif audio is None:
	return encodings
	else:
	inputs["labels"] = encodings["input_ids"]
	return inputs


	__all__ = ["MiniWhisperProcessor"]