ML_Tutor / app.py
HuzaifaTech's picture
Update app.py
d44db5a verified
import requests
import fitz
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from groq import Groq
import gradio as gr
import os
# =========================
# 1. LOAD API KEY (HF SECRET)
# =========================
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
client = Groq(api_key=GROQ_API_KEY)
# =========================
# 2. LOAD PDF
# =========================
pdf_url = "https://huggingface.co/datasets/HuzaifaTech/rag_file/resolve/main/Hands_On_Machine_Learning_with_Scikit_Le.pdf"
pdf_path = "file.pdf"
if not os.path.exists(pdf_path):
response = requests.get(pdf_url)
with open(pdf_path, "wb") as f:
f.write(response.content)
# =========================
# 3. EXTRACT TEXT
# =========================
doc = fitz.open(pdf_path)
text = ""
for page in doc:
text += page.get_text()
# =========================
# 4. CHUNKING
# =========================
def chunk_text(text, chunk_size=800):
paragraphs = text.split("\n")
chunks = []
current = ""
for para in paragraphs:
if len(current) + len(para) < chunk_size:
current += para + "\n"
else:
chunks.append(current.strip())
current = para
if current:
chunks.append(current.strip())
return chunks
chunks = chunk_text(text)[:300]
# =========================
# 5. EMBEDDINGS
# =========================
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(chunks, batch_size=32)
faiss.normalize_L2(embeddings)
# =========================
# 6. FAISS
# =========================
dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(embeddings)
# =========================
# 7. RETRIEVAL
# =========================
def retrieve(query, k=4):
q_emb = model.encode([query])
faiss.normalize_L2(q_emb)
_, idx = index.search(q_emb, k)
return [chunks[i] for i in idx[0]]
# =========================
# 8. GENERATION
# =========================
def generate_answer(query):
docs = retrieve(query)
context = "\n\n".join(docs)
prompt = f"""
Context:
{context}
Question:
{query}
"""
try:
res = client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[
{
"role": "system",
"content": "Answer ONLY from the provided context. If not found, say 'I don't know'."
},
{
"role": "user",
"content": prompt
}
],
temperature=0,
max_tokens=500
)
return res.choices[0].message.content
except Exception as e:
return f"Error: {str(e)}"
# =========================
# 9. UI (PROFESSIONAL)
# =========================
def chat(message, history):
return generate_answer(message)
with gr.Blocks() as demo:
gr.Markdown("# 📚 RAG Chatbot (ML Book)")
gr.Markdown("Ask questions from *Hands-On Machine Learning* PDF")
chatbot = gr.ChatInterface(
fn=chat,
chatbot=gr.Chatbot(height=400),
textbox=gr.Textbox(placeholder="Ask a question...", container=False),
)
demo.launch(theme=gr.themes.Soft())