Spaces:

agnixcode
/

bottttt

Sleeping

App Files Files Community

bottttt / app.py

agnixcode

Create app.py

fcbf118 verified 8 months ago

raw

history blame contribute delete

2.93 kB

	import os
	import gradio as gr
	import fitz # PyMuPDF
	from sentence_transformers import SentenceTransformer
	import chromadb
	from chromadb.utils import embedding_functions
	import openai

	# Load GROQ API Key
	openai.api_key = os.getenv("GROQ_API_KEY")
	openai.api_base = "https://api.groq.com/openai/v1"

	# Load embedding model
	embedder = SentenceTransformer("all-MiniLM-L6-v2")

	# Set up ChromaDB with persistence
	persist_path = "./chroma_db"
	db = chromadb.Client(chromadb.config.Settings(persist_directory=persist_path))
	collection = db.get_or_create_collection("papers")

	# Extract text from uploaded PDF
	def extract_text_from_pdf(file):
	text = ""
	doc = fitz.open(stream=file.read(), filetype="pdf")
	for page in doc:
	text += page.get_text()
	return text

	# Chunk and store in vector DB
	def chunk_and_store(text):
	chunks = [text[i:i+500] for i in range(0, len(text), 500)]
	embeddings = embedder.encode(chunks).tolist()

	for i, chunk in enumerate(chunks):
	collection.add(documents=[chunk], ids=[f"id_{len(collection.get()['ids']) + i}"], embeddings=[embeddings[i]])
	db.persist()

	# Retrieve relevant chunks and send to LLaMA3 via Groq
	def retrieve_and_ask(query):
	if len(collection.get()["documents"]) == 0:
	return "Please upload a paper first."

	query_embedding = embedder.encode([query]).tolist()[0]
	results = collection.query(query_embeddings=[query_embedding], n_results=3)
	context = "\n".join(results["documents"][0])

	system_prompt = "You are an academic assistant helping students understand research papers."
	user_prompt = f"Based on the following context:\n{context}\n\nAnswer the question:\n{query}"

	try:
	response = openai.ChatCompletion.create(
	model="llama3-70b-8192",
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt}
	]
	)
	return response['choices'][0]['message']['content']
	except Exception as e:
	return f"Error: {str(e)}"

	# Gradio UI
	def handle_upload(file):
	if file is None:
	return "Upload a valid PDF file."
	text = extract_text_from_pdf(file)
	chunk_and_store(text)
	return "✅ Paper uploaded and processed."

	def handle_query(query):
	return retrieve_and_ask(query)

	with gr.Blocks() as demo:
	gr.Markdown("### 📘 RAG Academic Assistant\nUpload a paper and ask questions.")

	with gr.Row():
	file = gr.File(label="Upload PDF", type="binary")
	upload_btn = gr.Button("Process")
	upload_output = gr.Textbox()

	with gr.Row():
	query = gr.Textbox(label="Ask a question")
	response = gr.Textbox(label="Answer")
	ask_btn = gr.Button("Ask")

	upload_btn.click(handle_upload, inputs=[file], outputs=[upload_output])
	ask_btn.click(handle_query, inputs=[query], outputs=[response])

	demo.launch()