rohannsinghal's picture
Update handler.py
98b9cc8 verified
raw
history blame contribute delete
879 Bytes
from sentence_transformers import SentenceTransformer
class EndpointHandler:
def __init__(self, path=""):
# Here is the magic override that bypasses the Hugging Face bug
print("Initializing Nemotron 8B with trust_remote_code=True...")
self.model = SentenceTransformer("nvidia/llama-embed-nemotron-8b", trust_remote_code=True,model_kwargs={"attn_implementation": "eager"})
print("Model loaded successfully!")
def __call__(self, data):
"""
This runs every time your Vectorize script sends text to the endpoint.
"""
# Get the text from the API request
inputs = data.pop("inputs", data)
# Generate the math vectors
embeddings = self.model.encode(inputs)
# Return it as a standard Python list so your local script can read it
return embeddings.tolist()