import os from langchain_community.document_loaders import TextLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS from rag.embeddings import get_embeddings def build_vectorstore(filepath: str = "data/knowledge_base.md"): """ Loads the knowledge base, splits it, and builds a FAISS vector store. """ if not os.path.exists(filepath): raise FileNotFoundError(f"Knowledge base not found at {filepath}") loader = TextLoader(filepath) docs = loader.load() text_splitter = RecursiveCharacterTextSplitter( chunk_size=100, chunk_overlap=20, separators=["\n\n", "\n", " ", ""] ) splits = text_splitter.split_documents(docs) embeddings = get_embeddings() vectorstore = FAISS.from_documents(splits, embeddings) return vectorstore _vectorstore = None def get_vectorstore(filepath: str = "data/knowledge_base.md"): global _vectorstore if _vectorstore is None: _vectorstore = build_vectorstore(filepath) return _vectorstore