| | from fastapi import FastAPI |
| | from pydantic import BaseModel |
| | import tensorflow as tf |
| | from transformers import AutoTokenizer, TFAutoModel |
| | from bs4 import BeautifulSoup |
| | import hazm |
| | import time |
| |
|
| | model_name="HooshvareLab/bert-base-parsbert-uncased" |
| | tokenizer=AutoTokenizer.from_pretrained(model_name) |
| | model=TFAutoModel.from_pretrained(model_name) |
| |
|
| | sent_tokenizer=hazm.SentenceTokenizer() |
| | normalizer=hazm.Normalizer() |
| |
|
| |
|
| | app=FastAPI() |
| |
|
| | class Input(BaseModel): |
| | texts: list |
| |
|
| | @app.post("/get_vectors") |
| | def get_vecs(data: Input): |
| | now=time.time() |
| | texts=data.texts |
| | texts=list(map(lambda x: BeautifulSoup(x).get_text(), texts)) |
| | texts=list(map(normalizer.normalize, texts)) |
| | |
| | tokens=tokenizer(texts, return_tensors="tf", padding=True, truncation=True, max_length=512) |
| | outputs=model(**tokens) |
| | |
| | sentence_embedding=tf.reduce_mean(outputs.last_hidden_state, axis=1) |
| | vecs=sentence_embedding.numpy().tolist() |
| | |
| | return {"vectors": vecs, "duration": time.time()-now} |