import os
from dotenv import load_dotenv
from langchain import OpenAI
from langchain.document_loaders import GitLoader
from langchain.vectorstores import VectorStore, Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains.question_answering import load_qa_chain

REPO_PATH = "out/ohmyzsh-wiki"
OHMYZSH_WIKI_URL = "https://github.com/ohmyzsh/wiki"
PRESISTENT_PATH = ".vdb"
DEBUG = True

memory = None

print('this is memory.py')
load_dotenv()

class Memory: 
    _instance = None
    vectordb = None

    def __init__(self):
        if self._instance is not None:
            raise Exception("Memory is a singleton")
        else:
            Memory._instance = self
            self.vectordb = self.update_memory()
    
    @staticmethod
    def get_instance():
        if Memory._instance is None:
            Memory()
        return Memory._instance
    

    # TODO - make it singleton and load it once
    def query(self, query):
        if self.vectordb is None:
            self.vectordb = Chroma(persist_directory=PRESISTENT_PATH, embedding_function=OpenAIEmbeddings())

        context_docs = self.vectordb.as_retriever().get_relevant_documents(query)
        chain = load_qa_chain(llm=OpenAI(), chain_type="stuff")
        answer = chain.run(input_documents=context_docs, question=query)

        if DEBUG:
            print(f"Found {len(context_docs)} relevant documents")
            print(f"Answer: {answer}")

        return answer
    
    ## Private methods
    def update_memory(self):
        docs = self._sniff_github(clone_url=None if os.path.exists(REPO_PATH) else OHMYZSH_WIKI_URL, branch="main", file_ext="md")
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
        doc_chunks = text_splitter.split_documents(docs)
        vectordb = Chroma.from_documents(documents=doc_chunks, embedding=OpenAIEmbeddings(), persist_directory=PRESISTENT_PATH)
        vectordb.persist()
        if DEBUG:
            print(f"[[Memory Update]]. Added {len(doc_chunks)} vectors to the vector store.")
        return vectordb
    
    def _sniff_github(self, clone_url=None, branch="main", file_ext=None):
        loader = GitLoader(repo_path="out/ohmyzsh-wiki",
                        clone_url=clone_url,
                        branch=branch, 
                        file_filter= lambda file_path: file_path.endswith(f".{file_ext}")) if file_ext else None

        documents = loader.load()
        if DEBUG:
            print(f"Found {len(documents)} documents in repo\n")    
        return documents

def get_memory():
    global memory
    if memory is None:
        return Memory.get_instance()
    else:
        return memory

def main():
    memory = Memory.get_instance()
    print(memory.query("how to install ohmyzsh"))

if __name__ == "__main__":
    main()