import streamlit as st from pdf_utils import extract_text, chunk_text from vector_store import build_index from tiny_llama import answer_query SAMPLE_PDF_PATH = "sample.pdf" SAMPLE_QUESTION = "What is this document about?" st.set_page_config(page_title="PDF Chatbot", page_icon="đ") st.title("đ Chat with your PDF (LLaMA-based)") # ---------------------- Initialize Session State ---------------------- if "messages" not in st.session_state: st.session_state.messages = [] if "index" not in st.session_state: st.session_state.index = None if "chunks" not in st.session_state: st.session_state.chunks = None if "pending_question" not in st.session_state: st.session_state.pending_question = None if "pdf_name" not in st.session_state: st.session_state.pdf_name = None if "using_sample" not in st.session_state: st.session_state.using_sample = False if "sample_processed" not in st.session_state: st.session_state.sample_processed = False # ---------------------- Sample PDF Load ---------------------- if not st.session_state.index and not st.session_state.pdf_name and not st.session_state.sample_processed: with st.spinner("Loading sample PDF and preparing demo..."): try: with open(SAMPLE_PDF_PATH, "rb") as f: text = extract_text(f) if text: chunks = chunk_text(text) index, _ = build_index(chunks) st.session_state.index = index st.session_state.chunks = chunks st.session_state.pdf_name = "Sample PDF" st.session_state.using_sample = True st.session_state.sample_processed = True st.session_state.messages = [] # Add sample question st.session_state.messages.append({ "role": "user", "content": SAMPLE_QUESTION, "is_sample": True }) # Generate actual answer from the sample PDF with st.spinner("Generating sample answer..."): answer = answer_query(SAMPLE_QUESTION, index, chunks) if not answer: answer = "I couldn't generate an answer from this document." st.session_state.messages.append({ "role": "assistant", "content": answer, "is_sample": True }) else: st.warning("Could not extract text from sample PDF.") except FileNotFoundError: st.warning("Sample PDF not found. Please upload your own.") except Exception as e: st.error(f"Error loading sample PDF: {str(e)}") # ---------------------- PDF Upload ---------------------- uploaded = st.file_uploader("Upload your PDF", type=["pdf"]) if uploaded is not None: # Reset everything if uploading a new PDF if st.session_state.pdf_name != uploaded.name: with st.spinner("Processing uploaded PDF..."): try: text = extract_text(uploaded) if text: chunks = chunk_text(text) index, _ = build_index(chunks) st.session_state.index = index st.session_state.chunks = chunks st.session_state.messages = [] st.session_state.pdf_name = uploaded.name st.session_state.using_sample = False st.success(f"Uploaded: {uploaded.name}. You can now chat!") else: st.warning("Could not extract text from uploaded PDF. It might be scanned or encrypted.") except Exception as e: st.error(f"Error processing uploaded PDF: {str(e)}") # ---------------------- Display Messages ---------------------- if st.session_state.pdf_name: st.subheader(f"Chatting with: {st.session_state.pdf_name}") for msg in st.session_state.messages: role = "đ§ You" if msg["role"] == "user" else "đ¤ Assistant" # Style sample messages differently if msg.get("is_sample", False): st.markdown(f"""