File size: 1,861 Bytes
3a31377 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | {
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "e7caa8ad",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4, 320)\n",
"tensor([[ 1.0000, 0.1580, -0.4305, -0.5529],\n",
" [ 0.1580, 1.0000, 0.6916, 0.6522],\n",
" [-0.4305, 0.6916, 1.0000, 0.9836],\n",
" [-0.5529, 0.6522, 0.9836, 1.0000]])\n"
]
}
],
"source": [
"from sentence_transformers import SentenceTransformer\n",
"\n",
"# Download from the 🤗 Hub\n",
"model = SentenceTransformer(\"gbyuvd/miniChembed-prototype\")\n",
"# Run inference\n",
"sentences = [\n",
" 'O=C1/C=C\\\\C=C2/N1C[C@@H]3CNC[C@H]2C3', # Cytisine\n",
" \"n1c2cc3c(cc2ncc1)[C@@H]4CNC[C@H]3C4\", # Varenicline\n",
" \"c1ncccc1[C@@H]2CCCN2C\", # Nicotine\n",
" 'Nc1nc2cncc-2co1', # CID: 162789184 \n",
"]\n",
"embeddings = model.encode(sentences)\n",
"print(embeddings.shape)\n",
"# (4, 768)\n",
"\n",
"# Get the similarity scores for the embeddings\n",
"similarities = model.similarity(embeddings, embeddings)\n",
"print(similarities)\n",
"# tensor([[1.0000, 0.4397, 0.3172, 0.0382],\n",
"# [0.4397, 1.0000, 0.2543, 0.1725],\n",
"# [0.3172, 0.2543, 1.0000, 0.2371],\n",
"# [0.0382, 0.1725, 0.2371, 1.0000]])\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|