from sentence_transformers import SentenceTransformer class EndpointHandler: def __init__(self, path=""): # Here is the magic override that bypasses the Hugging Face bug print("Initializing Nemotron 8B with trust_remote_code=True...") self.model = SentenceTransformer("nvidia/llama-embed-nemotron-8b", trust_remote_code=True,model_kwargs={"attn_implementation": "eager"}) print("Model loaded successfully!") def __call__(self, data): """ This runs every time your Vectorize script sends text to the endpoint. """ # Get the text from the API request inputs = data.pop("inputs", data) # Generate the math vectors embeddings = self.model.encode(inputs) # Return it as a standard Python list so your local script can read it return embeddings.tolist()