| | import numpy as np |
| | import tensorflow as tf |
| | from tensorflow.keras.preprocessing.sequence import pad_sequences |
| | from tensorflow.keras.preprocessing.text import tokenizer_from_json |
| | import json |
| | import os |
| |
|
| | |
| | class Pipeline: |
| | def __init__(self): |
| | |
| | with open("tokenizer.json", "r", encoding="utf-8") as f: |
| | tokenizer_json = f.read() |
| | self.tokenizer = tokenizer_from_json(tokenizer_json) |
| | self.max_len = 150 |
| |
|
| | |
| | self.model = tf.keras.models.load_model(".") |
| |
|
| | |
| | self.label_map = None |
| | if os.path.exists("label_map.json"): |
| | with open("label_map.json", "r", encoding="utf-8") as f: |
| | self.label_map = json.load(f) |
| |
|
| | def __call__(self, inputs): |
| | |
| | text = inputs.get("text", "") |
| | image_desc = inputs.get("image_desc", "") |
| | input_text = text + " " + image_desc |
| | seq = self.tokenizer.texts_to_sequences([input_text]) |
| | padded = pad_sequences(seq, maxlen=self.max_len, padding='post', truncating='post') |
| | pred_probs = self.model.predict(padded) |
| | pred_label = int(np.argmax(pred_probs, axis=1)[0]) |
| | score = float(np.max(pred_probs)) |
| | if self.label_map: |
| | label = self.label_map.get(str(pred_label), pred_label) |
| | else: |
| | label = pred_label |
| | return {"label": label, "score": score} |
| |
|