File size: 1,550 Bytes
4fa995b
2d836de
 
 
 
 
 
 
 
 
4fa995b
 
 
2d836de
4fa995b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d836de
4fa995b
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import os
import tempfile

# ✅ Force cache paths to a writable location
HF_CACHE = os.path.join(tempfile.gettempdir(), "hf_cache")
os.environ["HF_HOME"] = HF_CACHE
os.environ["TRANSFORMERS_CACHE"] = HF_CACHE
os.environ["SENTENCE_TRANSFORMERS_HOME"] = HF_CACHE

from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pickle
import gdown

class Embedder:
    def __init__(self, model_name='sentence-transformers/all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.metadata = None

    def download_file(self, url, out_path):
        if not os.path.exists(out_path):
            gdown.download(url, out_path, quiet=False)

    def load_from_files(self, index_path, metadata_path):
        self.index = faiss.read_index(index_path)
        with open(metadata_path, "rb") as f:
            self.metadata = pickle.load(f)

    def load_from_drive(self, index_url, metadata_url):
        self.download_file(index_url, "faiss_index.idx")
        self.download_file(metadata_url, "metadata.pkl")
        self.load_from_files("faiss_index.idx", "metadata.pkl")

    def query(self, query_text, k=5):
        if self.index is None or self.metadata is None:
            raise ValueError("Index or metadata not loaded")
        
        query_embedding = self.model.encode([query_text]).astype('float32')
        D, I = self.index.search(query_embedding, k)
        results = self.metadata.iloc[I[0]].copy()
        results['score'] = D[0]
        return results