Nidhi-Phophaliya commited on
Commit
4fa995b
·
verified ·
1 Parent(s): c765e15

Update app/utils/embedder.py

Browse files
Files changed (1) hide show
  1. app/utils/embedder.py +42 -38
app/utils/embedder.py CHANGED
@@ -1,38 +1,42 @@
1
- # app/utils/embedder.py
2
-
3
- import gdown
4
- import os
5
- import faiss
6
- import numpy as np
7
- import pickle
8
- from sentence_transformers import SentenceTransformer
9
-
10
- class Embedder:
11
- def __init__(self, model_name='paraphrase-MiniLM-L3-v2'):
12
- self.model = SentenceTransformer(model_name)
13
- self.index = None
14
- self.metadata = None
15
-
16
- def download_file(self, url, out_path):
17
- if not os.path.exists(out_path):
18
- gdown.download(url, out_path, quiet=False)
19
-
20
- def load_from_files(self, index_path, metadata_path):
21
- self.index = faiss.read_index(index_path)
22
- with open(metadata_path, "rb") as f:
23
- self.metadata = pickle.load(f)
24
-
25
- def load_from_drive(self, index_url, metadata_url):
26
- self.download_file(index_url, "faiss_index.idx")
27
- self.download_file(metadata_url, "metadata.pkl")
28
- self.load_from_files("faiss_index.idx", "metadata.pkl")
29
-
30
- def query(self, query_text, k=5):
31
- if self.index is None or self.metadata is None:
32
- raise ValueError("Index or metadata not loaded")
33
-
34
- query_embedding = self.model.encode([query_text]).astype('float32')
35
- D, I = self.index.search(query_embedding, k)
36
- results = self.metadata.iloc[I[0]].copy()
37
- results['score'] = D[0]
38
- return results
 
 
 
 
 
1
+ # app/utils/embedder.py
2
+
3
+ import gdown
4
+ import os
5
+ import faiss
6
+ import numpy as np
7
+ import pickle
8
+ from sentence_transformers import SentenceTransformer
9
+
10
+ class Embedder:
11
+ def __init__(self, model_name='sentence-transformers/all-MiniLM-L6-v2'):
12
+ os.environ['TRANSFORMERS_CACHE'] = './cache'
13
+ os.environ['HF_HOME'] = './cache'
14
+ os.makedirs('./cache', exist_ok=True)
15
+
16
+ self.model = SentenceTransformer(model_name)
17
+ self.index = None
18
+ self.metadata = None
19
+
20
+ def download_file(self, url, out_path):
21
+ if not os.path.exists(out_path):
22
+ gdown.download(url, out_path, quiet=False)
23
+
24
+ def load_from_files(self, index_path, metadata_path):
25
+ self.index = faiss.read_index(index_path)
26
+ with open(metadata_path, "rb") as f:
27
+ self.metadata = pickle.load(f)
28
+
29
+ def load_from_drive(self, index_url, metadata_url):
30
+ self.download_file(index_url, "faiss_index.idx")
31
+ self.download_file(metadata_url, "metadata.pkl")
32
+ self.load_from_files("faiss_index.idx", "metadata.pkl")
33
+
34
+ def query(self, query_text, k=5):
35
+ if self.index is None or self.metadata is None:
36
+ raise ValueError("Index or metadata not loaded")
37
+
38
+ query_embedding = self.model.encode([query_text]).astype('float32')
39
+ D, I = self.index.search(query_embedding, k)
40
+ results = self.metadata.iloc[I[0]].copy()
41
+ results['score'] = D[0]
42
+ return results