harryrobert
/

latex-ocr

Model card Files Files and versions

latex-ocr / processing_latex_ocr.py

harryrobert's picture

Upload folder using huggingface_hub

3372a56 verified 2 days ago

history blame contribute delete

1.23 kB

	from transformers import ProcessorMixin
	from image_processing_latex_ocr import LaTeXOCRImageProcessor
	from tokenization_latex_ocr import LaTeXTokenizer

	class LaTeXOCRProcessor(ProcessorMixin):
	attributes = ["image_processor", "tokenizer"]
	image_processor_class = "AutoImageProcessor"
	tokenizer_class = "AutoTokenizer"

	def __init__(self, image_processor, tokenizer):
	super().__init__(image_processor, tokenizer)

	def __call__(self, images=None, text=None, return_tensors=None, **kwargs):
	if images is None and text is None:
	raise ValueError("You must specify either images or text.")

	output = {}
	if images is not None:
	image_inputs = self.image_processor(images, return_tensors=return_tensors, **kwargs)
	output.update(image_inputs)

	if text is not None:
	text_inputs = self.tokenizer(text, return_tensors=return_tensors, **kwargs)
	output.update(text_inputs)

	return output

	def batch_decode(self, args, *kwargs):
	return self.tokenizer.batch_decode(args, *kwargs)

	def decode(self, args, *kwargs):
	return self.tokenizer.decode(args, *kwargs)