Spaces:
Sleeping
Sleeping
File size: 1,550 Bytes
4fa995b 2d836de 4fa995b 2d836de 4fa995b 2d836de 4fa995b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | import os
import tempfile
# ✅ Force cache paths to a writable location
HF_CACHE = os.path.join(tempfile.gettempdir(), "hf_cache")
os.environ["HF_HOME"] = HF_CACHE
os.environ["TRANSFORMERS_CACHE"] = HF_CACHE
os.environ["SENTENCE_TRANSFORMERS_HOME"] = HF_CACHE
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pickle
import gdown
class Embedder:
def __init__(self, model_name='sentence-transformers/all-MiniLM-L6-v2'):
self.model = SentenceTransformer(model_name)
self.index = None
self.metadata = None
def download_file(self, url, out_path):
if not os.path.exists(out_path):
gdown.download(url, out_path, quiet=False)
def load_from_files(self, index_path, metadata_path):
self.index = faiss.read_index(index_path)
with open(metadata_path, "rb") as f:
self.metadata = pickle.load(f)
def load_from_drive(self, index_url, metadata_url):
self.download_file(index_url, "faiss_index.idx")
self.download_file(metadata_url, "metadata.pkl")
self.load_from_files("faiss_index.idx", "metadata.pkl")
def query(self, query_text, k=5):
if self.index is None or self.metadata is None:
raise ValueError("Index or metadata not loaded")
query_embedding = self.model.encode([query_text]).astype('float32')
D, I = self.index.search(query_embedding, k)
results = self.metadata.iloc[I[0]].copy()
results['score'] = D[0]
return results
|