ejqs
/

BK_QNADateExtractor

Question Answering

Model card Files Files and versions

BK_QNADateExtractor / handler.py

earlsab

remove special characters

c1b7ccc 11 months ago

history blame contribute delete

3.58 kB

	import re
	from typing import Dict, List, Any
	from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
	import torch


	class EndpointHandler():
	def __init__(self, path=""):
	# Preload all the elements you are going to need at inference.
	# pseudo:
	# self.model= load_model(path)


	# Load the fine-tuned model
	self.date_model_path = path + "/deberta-qa-finetuned"
	self.date_tokenizer = AutoTokenizer.from_pretrained(self.date_model_path)
	self.date_model = AutoModelForQuestionAnswering.from_pretrained(self.date_model_path)

	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.date_model.to(self.device)

	def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
	"""
	data args:
	inputs (:obj: `str` \| `PIL.Image` \| `np.array`)
	kwargs
	Return:
	A :obj:`list` \| `dict`: will be serialized and returned
	"""
	start_date = self.remove_special_characters(self.extract_start_date(data["inputs"]))
	end_date = self.remove_special_characters(self.extract_end_date(data["inputs"]))
	return {"start_date": start_date, "end_date": end_date}

	def remove_special_characters(self, s):
	return re.sub(r'(?<!\d)[^\w\s/]+\|[^\w\s/]+(?!\d)', '', s).strip()


	def extract_start_date(self, text):
	question = "What is the start date?"

	# Tokenize the input
	inputs = self.date_tokenizer(question, text, return_tensors="pt")
	if torch.cuda.is_available():
	inputs = {k: v.cuda() for k, v in inputs.items()}

	# Get model outputs (start and end logits)
	with torch.no_grad():
	outputs = self.date_model(**inputs)

	# Identify the most likely start and end token positions
	answer_start = torch.argmax(outputs.start_logits)
	answer_end = torch.argmax(outputs.end_logits) + 1

	# Convert token IDs to the answer string
	answer_tokens = inputs["input_ids"][0][answer_start:answer_end]
	answer = self.date_tokenizer.decode(answer_tokens, skip_special_tokens=True)

	return answer

	def extract_end_date(self, text):
	question = "What is the end date?"

	# Tokenize the input
	inputs = self.date_tokenizer(question, text, return_tensors="pt")
	if torch.cuda.is_available():
	inputs = {k: v.cuda() for k, v in inputs.items()}

	# Get model outputs (end and end logits)
	with torch.no_grad():
	outputs = self.date_model(**inputs)

	# Identify the most likely start and end token positions
	answer_start = torch.argmax(outputs.start_logits)
	answer_end = torch.argmax(outputs.end_logits) + 1

	# Convert token IDs to the answer string
	answer_tokens = inputs["input_ids"][0][answer_start:answer_end]
	answer = self.date_tokenizer.decode(answer_tokens, skip_special_tokens=True)

	return answer


	if __name__ == "__main__":
	from handler import EndpointHandler

	# init handler
	my_handler = EndpointHandler(path=".")

	# prepare sample payload
	non_holiday_payload = {"inputs": "I am quite excited how this will turn out 08-08-2025 - 09-08-2025"}
	# holiday_payload = {"inputs": "Today is a though day"}

	# test the handler
	non_holiday_pred=my_handler(non_holiday_payload)
	# holiday_payload=my_handler(holiday_payload)

	# show results
	print(non_holiday_pred)
	# print("holiday_payload", holiday_payload)