Spaces:
Runtime error
Runtime error
| import torch | |
| from transformers import AutoTokenizer, AutoModel | |
| MODEL_NAME = "microsoft/unixcoder-base" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| unix_model = AutoModel.from_pretrained(MODEL_NAME) | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| unix_model.to(device) | |
| unix_model.eval() | |
| def get_unixcoder_embedding(code, max_length=512): | |
| inputs = tokenizer( | |
| code, | |
| padding=True, | |
| truncation=True, | |
| max_length=max_length, | |
| return_tensors="pt" | |
| ) | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| outputs = unix_model(**inputs) | |
| last_hidden = outputs.last_hidden_state | |
| cls_embedding = last_hidden[:, 0, :] | |
| mean_embedding = last_hidden.mean(dim=1) | |
| combined = torch.cat((cls_embedding, mean_embedding), dim=1) | |
| return combined.cpu().numpy().flatten() |