| import streamlit as st |
| from langchain.vectorstores import FAISS |
| from langchain.document_loaders.csv_loader import CSVLoader |
| from langchain.embeddings import HuggingFaceInstructEmbeddings |
| from langchain.prompts import PromptTemplate |
| from langchain.chains import RetrievalQA |
| from langchain.llms import OpenAI |
| import os |
| from dotenv import load_dotenv |
|
|
| load_dotenv() |
|
|
| |
| llm = OpenAI( |
| base_url="https://api.groq.com/openai/v1", |
| api_key="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", |
| model_name="llama3-8b-8192", |
| temperature=0.1 |
| ) |
|
|
| embedding_model = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large") |
| vectordb_file_path = "faiss_index" |
|
|
| def create_vector_db(): |
| loader = CSVLoader(file_path='codebasics_faqs.csv', source_column="prompt") |
| data = loader.load() |
| vectordb = FAISS.from_documents(documents=data, embedding=embedding_model) |
| vectordb.save_local(vectordb_file_path) |
|
|
| def get_qa_chain(): |
| vectordb = FAISS.load_local(vectordb_file_path, embedding_model) |
| retriever = vectordb.as_retriever(score_threshold=0.7) |
|
|
| prompt_template = """Given the following context and a question, generate an answer based on this context only. |
| In the answer try to provide as much text as possible from "response" section in the source document context without making much changes. |
| If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer. |
| |
| CONTEXT: {context} |
| |
| QUESTION: {question}""" |
|
|
| PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) |
|
|
| chain = RetrievalQA.from_chain_type( |
| llm=llm, |
| chain_type="stuff", |
| retriever=retriever, |
| return_source_documents=True, |
| input_key="query", |
| chain_type_kwargs={"prompt": PROMPT} |
| ) |
| return chain |
|
|
| |
| st.title("๐ Ask Questions About Your CSV") |
| if not os.path.exists(f"{vectordb_file_path}/index.faiss"): |
| with st.spinner("Creating vector DB..."): |
| create_vector_db() |
|
|
| user_input = st.text_input("Enter your question:") |
| if user_input: |
| qa_chain = get_qa_chain() |
| result = qa_chain({"query": user_input}) |
| st.write("### Answer:") |
| st.write(result["result"]) |
|
|
| with st.expander("Show Source Document(s)"): |
| for doc in result["source_documents"]: |
| st.markdown(f"**Source:** {doc.metadata}") |
| st.text(doc.page_content) |
|
|