trkn-hackrx / app /utils /embedder.py
Nidhi-Phophaliya's picture
Update app/utils/embedder.py
2d836de verified
import os
import tempfile
# ✅ Force cache paths to a writable location
HF_CACHE = os.path.join(tempfile.gettempdir(), "hf_cache")
os.environ["HF_HOME"] = HF_CACHE
os.environ["TRANSFORMERS_CACHE"] = HF_CACHE
os.environ["SENTENCE_TRANSFORMERS_HOME"] = HF_CACHE
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pickle
import gdown
class Embedder:
def __init__(self, model_name='sentence-transformers/all-MiniLM-L6-v2'):
self.model = SentenceTransformer(model_name)
self.index = None
self.metadata = None
def download_file(self, url, out_path):
if not os.path.exists(out_path):
gdown.download(url, out_path, quiet=False)
def load_from_files(self, index_path, metadata_path):
self.index = faiss.read_index(index_path)
with open(metadata_path, "rb") as f:
self.metadata = pickle.load(f)
def load_from_drive(self, index_url, metadata_url):
self.download_file(index_url, "faiss_index.idx")
self.download_file(metadata_url, "metadata.pkl")
self.load_from_files("faiss_index.idx", "metadata.pkl")
def query(self, query_text, k=5):
if self.index is None or self.metadata is None:
raise ValueError("Index or metadata not loaded")
query_embedding = self.model.encode([query_text]).astype('float32')
D, I = self.index.search(query_embedding, k)
results = self.metadata.iloc[I[0]].copy()
results['score'] = D[0]
return results