Note_Retriever / app.py
ZacBl's picture
Optimisation for faster exec
79a5c64 verified
import streamlit as st
from st_copy_to_clipboard import st_copy_to_clipboard
import re
import numpy as np
from doc_preprocessing import process_files, get_embeddings
from vector_DB import VectorDatabase # Import the class
from llm_interaction import get_answer
# Initialize vector database (FAISS) - corrected instantiation
vector_database = VectorDatabase() #Instantiate the VectorDatabase Class
chunks_metadata = []
@st.cache_data
def process_query(query):
if vector_database.is_empty(): #Use the method
return "Please upload files first."
# query_embedding = get_embeddings([query])[0]
# results = vector_database.query(query_embedding, k=3) # use the method
print('Query:', query)
query_embedding = get_embeddings([query])[0] # Get the embedding for the query
print('Asking Queries..................')
results = vector_database.query(query_embedding, k=10) # Get the top 2 results
return results
def normalize_line_breaks(text):
text = text.replace("\\n", " \n ")
return text
def display_results(results, chunks):
cpt = 1
for result in (results):
if result['score'] < 0.5:
st.subheader(f"Réponse {cpt} :")
st.write(f"Source File: {result['file_name']}, Score: {round(1./(1+result['score'])*100,2)}%") #
text_to_display = result['chunk_text']
col1, col2 = st.columns(2)
with col1:
previous_chunk_index = result['chunk_index'] - 1
if previous_chunk_index >= 0:
try:
previous_chunk_text = chunks[previous_chunk_index]
if st.button(f"Ajouter la portion de texte précédente", key=f"before_{cpt}"):
text_to_display = previous_chunk_text[:-50] + result['chunk_text']
except IndexError:
pass #silently ignore
with col2:
next_chunk_index = result['chunk_index'] + 1
if next_chunk_index < len(chunks):
try:
next_chunk_text = chunks[next_chunk_index]
if st.button(f"Ajouter la portion de texte suivante", key=f"after_{cpt}"):
text_to_display = result['chunk_text'] + next_chunk_text[50:]
except IndexError:
pass
st.write("Citations depuis le document :")
st.write(normalize_line_breaks(text_to_display))
st_copy_to_clipboard(normalize_line_breaks(text_to_display))
cpt += 1
def main():
st.title("Document Query App")
uploaded_files = st.file_uploader(
"Upload PDF or Word files", accept_multiple_files=True, type=["pdf", "docx"]
)
query = st.text_input("Enter your query:")
if uploaded_files:
global chunks_metadata
all_chunks, all_embeddings, chunks_metadata = process_files(uploaded_files)
vector_database.add_data(all_embeddings, all_chunks, chunks_metadata) # use the method
st.session_state.files_processed = True
if query:
results = process_query(query)
display_results(results, all_chunks)
if __name__ == "__main__":
main()