| | import os |
| | from langchain_community.document_loaders import PyPDFLoader |
| | from langchain_text_splitters import RecursiveCharacterTextSplitter |
| | from langchain_community.vectorstores import FAISS |
| | from langchain_core.prompts import ChatPromptTemplate |
| | from langchain_core.output_parsers import StrOutputParser |
| | from langchain_core.runnables import RunnablePassthrough, RunnableParallel |
| | from langchain_community.embeddings import HuggingFaceEmbeddings |
| | from langchain_openai import ChatOpenAI |
| | from langchain_community.chat_models import ChatLiteLLM |
| | from langchain_core.messages import HumanMessage, AIMessage |
| |
|
| | class ProjectRAGEngine: |
| | def __init__(self): |
| | |
| | self.embeddings = HuggingFaceEmbeddings( |
| | model_name="sentence-transformers/all-MiniLM-L6-v2", |
| | model_kwargs={"device": "cpu"}, |
| | encode_kwargs={"normalize_embeddings": True} |
| | ) |
| | |
| | self.llm = ChatOpenAI( |
| | model="openai/gpt-oss-120b:free", |
| | base_url="https://openrouter.ai/api/v1", |
| | api_key=os.getenv("OPENROUTER_API_KEY"), |
| | extra_body={"reasoning": {"enabled": True}}) |
| | self.vector_store = None |
| |
|
| | def process_documents(self, pdf_paths): |
| | all_docs = [] |
| |
|
| | for path in pdf_paths: |
| | loader = PyPDFLoader(path) |
| | all_docs.extend(loader.load()) |
| |
|
| | splitter = RecursiveCharacterTextSplitter( |
| | chunk_size=500, |
| | chunk_overlap=50 |
| | ) |
| |
|
| | splits = splitter.split_documents(all_docs) |
| |
|
| | |
| | self.vector_store = FAISS.from_documents( |
| | splits, self.embeddings |
| | ) |
| |
|
| | def _format_docs(self, docs): |
| | return "\n\n".join(d.page_content for d in docs) |
| |
|
| | def get_answer(self, query): |
| | if not self.vector_store: |
| | return "Please upload documents first.", [] |
| |
|
| | template = """ |
| | You are a professional Project Analyst. |
| | Answer strictly using the context. |
| | If unknown, say you don't know. |
| | Cite document names and page numbers. |
| | Context: |
| | {context} |
| | Question: |
| | {question} |
| | """ |
| |
|
| | prompt = ChatPromptTemplate.from_template(template) |
| | retriever = self.vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 5, "lambda_mult":0.25}) |
| |
|
| | rag_chain = ( |
| | RunnablePassthrough.assign( |
| | context=lambda x: self._format_docs(x["context"]) |
| | ) |
| | | prompt |
| | | self.llm |
| | | StrOutputParser() |
| | ) |
| |
|
| | chain = RunnableParallel( |
| | {"context": retriever, "question": RunnablePassthrough()} |
| | ).assign(answer=rag_chain) |
| |
|
| | result = chain.invoke(query) |
| |
|
| | sources = [ |
| | {"content": d.page_content, "metadata": d.metadata} |
| | for d in result["context"] |
| | ] |
| |
|
| | return result["answer"], sources |