| | import os |
| | from dotenv import load_dotenv |
| | from datetime import datetime |
| | import time |
| | from langchain.llms import OpenAI |
| | from langchain.document_loaders import TextLoader |
| | from langchain.text_splitter import CharacterTextSplitter |
| | from langchain.embeddings.openai import OpenAIEmbeddings |
| | from langchain.chains import RetrievalQA |
| | from langchain.prompts import PromptTemplate |
| | from langchain.llms import OpenAI |
| | from langchain.vectorstores import FAISS |
| | from langchain.prompts import PromptTemplate |
| |
|
| | from interface import app |
| | import streamlit as st |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | class GenerateLearningPathIndexEmbeddings: |
| | def __init__(self, csv_filename): |
| | load_dotenv() |
| | self.openai_api_key = os.getenv("OPENAI_API_KEY") |
| | self.data_path = os.path.join(os.getcwd(), csv_filename) |
| | self.our_custom_data = None |
| | self.openai_embeddings = None |
| | self.faiss_vectorstore = None |
| |
|
| | self.load_csv_data() |
| | self.get_openai_embeddings() |
| | self.create_faiss_vectorstore_with_csv_data_and_openai_embeddings() |
| | |
| | def load_csv_data(self): |
| | |
| | print(' -- Started loading .csv file for chunking purposes.') |
| | loader = TextLoader(self.data_path) |
| | document = loader.load() |
| | text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30, separator="\n") |
| | self.our_custom_data = text_splitter.split_documents(document) |
| | print(f' -- Finished spitting (i.e. chunking) text (i.e. documents) from the .csv file (i.e. {self.data_path}).') |
| | |
| | def get_openai_embeddings(self): |
| | self.openai_embeddings = OpenAIEmbeddings(openai_api_key=self.openai_api_key, request_timeout=60) |
| | |
| | def create_faiss_vectorstore_with_csv_data_and_openai_embeddings(self): |
| | faiss_vectorstore_foldername = "faiss_learning_path_index" |
| | if not os.path.exists(faiss_vectorstore_foldername): |
| | print(' -- Creating a new FAISS vector store from chunked text and OpenAI embeddings.') |
| | vectorstore = FAISS.from_documents(self.our_custom_data, self.openai_embeddings) |
| | vectorstore.save_local(faiss_vectorstore_foldername) |
| | print(f' -- Saved the newly created FAISS vector store at "{faiss_vectorstore_foldername}".') |
| | else: |
| | print(f' -- WARNING: Found existing FAISS vector store at "{faiss_vectorstore_foldername}", loading from cache.') |
| | print(f' -- NOTE: Delete the FAISS vector store at "{faiss_vectorstore_foldername}", if you wish to regenerate it from scratch for the next run.') |
| | self.faiss_vectorstore = FAISS.load_local( |
| | "faiss_learning_path_index", self.openai_embeddings |
| | ) |
| |
|
| | def get_faiss_vector_store(self): |
| | return self.faiss_vectorstore |
| |
|
| |
|
| | |
| | def running_inside_streamlit(): |
| | """ |
| | Function to check whether python code is run within streamlit |
| | |
| | Returns |
| | ------- |
| | use_streamlit : boolean |
| | True if code is run within streamlit, else False |
| | """ |
| | try: |
| | from streamlit.runtime.scriptrunner import get_script_run_ctx |
| | if not get_script_run_ctx(): |
| | use_streamlit = False |
| | else: |
| | use_streamlit = True |
| | except ModuleNotFoundError: |
| | use_streamlit = False |
| | return use_streamlit |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | class GenAILearningPathIndex: |
| | def __init__(self, faiss_vectorstore): |
| | load_dotenv() |
| | self.openai_api_key = os.getenv("OPENAI_API_KEY") |
| | self.faiss_vectorstore = faiss_vectorstore |
| |
|
| | prompt_template = \ |
| | """ |
| | Use the following template to answer the question at the end, |
| | from the Learning Path Index csv file, |
| | display top 4 results in a tablular format and it |
| | should look like this: |
| | | Learning Pathway | duration | link | Module |
| | | --- | --- | --- | --- | |
| | | ... | ... | ... | ... | |
| | it must contain a link for each line of the result in a table, |
| | consider the duration and Module information mentioned in the question, |
| | If you don't know the answer, don't make an entry in the table, |
| | {context} |
| | Question: {question} |
| | """ |
| | PROMPT = PromptTemplate(template=prompt_template, input_variables=["context","question"]) |
| | |
| | self.chain_type_kwargs = {"prompt": PROMPT} |
| | |
| | self.llm = OpenAI(temperature=1.0, openai_api_key=self.openai_api_key) |
| | |
| | |
| | |
| | def get_response_for(self, query: str): |
| | qa = RetrievalQA.from_chain_type( |
| | llm=self.llm, chain_type="stuff", |
| | retriever=self.faiss_vectorstore.as_retriever(), |
| | chain_type_kwargs=self.chain_type_kwargs |
| | ) |
| | return qa.run(query) |
| |
|
| | def get_formatted_time(current_time = time.time()): |
| | return datetime.utcfromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S') |
| |
|
| | |
| | @st.cache_data |
| | def load_model(): |
| | start_time = time.time() |
| | print(f"\nStarted loading custom embeddings (created from .csv file) at {get_formatted_time(start_time)}") |
| | learningPathIndexEmbeddings = GenerateLearningPathIndexEmbeddings("Learning_Pathway_Index.csv") |
| | faiss_vectorstore = learningPathIndexEmbeddings.get_faiss_vector_store() |
| | end_time = time.time() |
| | print(f"Finished loading custom embeddings (created from .csv file) at {get_formatted_time(end_time)}") |
| | print(f"Custom embeddings (created from .csv file) took about {end_time - start_time} seconds to load.") |
| | return faiss_vectorstore |
| |
|
| | |
| | def query_gpt_model(query: str): |
| | start_time = time.time() |
| | print(f"\nQuery processing start time: {get_formatted_time(start_time)}") |
| | genAIproject = GenAILearningPathIndex(faiss_vectorstore) |
| | answer = genAIproject.get_response_for(query) |
| | end_time = time.time() |
| | print(f"\nQuery processing finish time: {get_formatted_time(end_time)}") |
| | print(f"\nAnswer (took about {end_time - start_time} seconds)") |
| | return answer |
| |
|
| |
|
| | if __name__=='__main__': |
| | faiss_vectorstore = load_model() |
| |
|
| | if running_inside_streamlit(): |
| | print("\nStreamlit environment detected. \nTo run a CLI interactive version just run `python main.py` in the CLI.\n") |
| | query_from_stream_list = app() |
| | if query_from_stream_list: |
| | answer = query_gpt_model(query_from_stream_list) |
| | st.write(answer) |
| | else: |
| | print("\nCommand-line interactive environment detected.\n") |
| | while True: |
| | query = input("\nEnter a query: ") |
| | if query == "exit": |
| | break |
| | if query.strip() == "": |
| | continue |
| |
|
| | if query: |
| | answer = query_gpt_model(query) |
| |
|
| | print("\n\n> Question:") |
| | print(query) |
| | print(answer) |
| |
|