Spaces:

ZacBl
/

Note_Retriever

Sleeping

App Files Files Community

Note_Retriever / app.py

ZacBl

Optimisation for faster exec

79a5c64 verified about 1 year ago

raw

history blame contribute delete

3.28 kB

	import streamlit as st
	from st_copy_to_clipboard import st_copy_to_clipboard
	import re
	import numpy as np
	from doc_preprocessing import process_files, get_embeddings
	from vector_DB import VectorDatabase # Import the class
	from llm_interaction import get_answer

	# Initialize vector database (FAISS) - corrected instantiation
	vector_database = VectorDatabase() #Instantiate the VectorDatabase Class
	chunks_metadata = []



	@st.cache_data
	def process_query(query):
	if vector_database.is_empty(): #Use the method
	return "Please upload files first."

	# query_embedding = get_embeddings([query])[0]
	# results = vector_database.query(query_embedding, k=3) # use the method
	print('Query:', query)
	query_embedding = get_embeddings([query])[0] # Get the embedding for the query
	print('Asking Queries..................')
	results = vector_database.query(query_embedding, k=10) # Get the top 2 results

	return results

	def normalize_line_breaks(text):
	text = text.replace("\\n", " \n ")

	return text

	def display_results(results, chunks):
	cpt = 1
	for result in (results):
	if result['score'] < 0.5:
	st.subheader(f"Réponse {cpt} :")
	st.write(f"Source File: {result['file_name']}, Score: {round(1./(1+result['score'])*100,2)}%") #

	text_to_display = result['chunk_text']
	col1, col2 = st.columns(2)
	with col1:
	previous_chunk_index = result['chunk_index'] - 1
	if previous_chunk_index >= 0:
	try:
	previous_chunk_text = chunks[previous_chunk_index]
	if st.button(f"Ajouter la portion de texte précédente", key=f"before_{cpt}"):
	text_to_display = previous_chunk_text[:-50] + result['chunk_text']
	except IndexError:
	pass #silently ignore
	with col2:
	next_chunk_index = result['chunk_index'] + 1
	if next_chunk_index < len(chunks):
	try:
	next_chunk_text = chunks[next_chunk_index]
	if st.button(f"Ajouter la portion de texte suivante", key=f"after_{cpt}"):
	text_to_display = result['chunk_text'] + next_chunk_text[50:]
	except IndexError:
	pass

	st.write("Citations depuis le document :")
	st.write(normalize_line_breaks(text_to_display))
	st_copy_to_clipboard(normalize_line_breaks(text_to_display))
	cpt += 1


	def main():
	st.title("Document Query App")

	uploaded_files = st.file_uploader(
	"Upload PDF or Word files", accept_multiple_files=True, type=["pdf", "docx"]
	)

	query = st.text_input("Enter your query:")

	if uploaded_files:
	global chunks_metadata
	all_chunks, all_embeddings, chunks_metadata = process_files(uploaded_files)
	vector_database.add_data(all_embeddings, all_chunks, chunks_metadata) # use the method

	st.session_state.files_processed = True

	if query:
	results = process_query(query)
	display_results(results, all_chunks)

	if __name__ == "__main__":
	main()