Spaces:

Aluode
/

Conjuction-Reservoir-Rag

Sleeping

App Files Files Community

Aluode commited on 22 days ago

Commit

e276dcf

verified ·

1 Parent(s): f65b63e

Update app.py

Browse files

Files changed (1) hide show

app.py +248 -313

app.py CHANGED Viewed

@@ -9,7 +9,6 @@ Generation uses HuggingFace Inference API (free, no key required).
 import re
 import os
 import time
-import json
 import gradio as gr
 from pathlib import Path
@@ -34,7 +33,7 @@ except ImportError:
 DEFAULT_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
 FALLBACK_MODEL = "HuggingFaceH4/zephyr-7b-beta"
 MAX_TOKENS = 512
-MAX_HISTORY = 6  # turns to keep in context
 DEMO_TEXT = """The ConjunctionReservoir is a document retrieval system that asks not
 "do these query terms appear somewhere in this chunk?" but rather
@@ -60,13 +59,11 @@ co-occurrence queries. Use threshold=0.0 to approach standard TF-IDF."""
 # ── Text extraction ────────────────────────────────────────────────────────────
 def extract_text_from_file(filepath: str) -> str:
-    """Extract text from .txt or .pdf file."""
     path = Path(filepath)
     ext = path.suffix.lower()
     if ext == ".pdf":
         if not PDF_SUPPORT:
-            return "ERROR: PDF support not available. Please install PyMuPDF or pypdf."
         try:
             import fitz
             doc = fitz.open(filepath)
@@ -78,18 +75,11 @@ def extract_text_from_file(filepath: str) -> str:
                 return "\n\n".join(p.extract_text() or "" for p in reader.pages)
             except Exception as e:
                 return f"ERROR reading PDF: {e}"
-    elif ext in (".txt", ".md", ".rst", ".text"):
-        try:
-            return path.read_text(encoding="utf-8", errors="replace")
-        except Exception as e:
-            return f"ERROR reading file: {e}"
     else:
         try:
             return path.read_text(encoding="utf-8", errors="replace")
         except Exception as e:
-            return f"ERROR: Unsupported file type {ext}. Try .txt or .pdf"
 # ── LLM generation ────────────────────────────────────────────────────────────
@@ -101,15 +91,14 @@ def get_client(hf_token: str = "") -> InferenceClient:
 def format_messages(system: str, history: list, user_msg: str) -> list:
     messages = [{"role": "system", "content": system}]
-    for user_h, asst_h in history[-MAX_HISTORY:]:
-        messages.append({"role": "user", "content": user_h})
-        messages.append({"role": "assistant", "content": asst_h})
     messages.append({"role": "user", "content": user_msg})
     return messages
 def stream_response(client, model, messages):
-    """Stream tokens from HF Inference API."""
     try:
         stream = client.chat.completions.create(
             model=model,
@@ -123,7 +112,6 @@ def stream_response(client, model, messages):
             if delta:
                 yield delta
     except Exception as e:
-        # Try fallback model
         if model != FALLBACK_MODEL:
             try:
                 stream = client.chat.completions.create(
@@ -140,7 +128,7 @@ def stream_response(client, model, messages):
                 return
             except Exception:
                 pass
-        yield f"\n\n⚠️ Generation error: {e}\n\nTip: Add a HuggingFace token in Settings for better rate limits."
 # ── Retrieval helpers ─────────────────────────────────────────────────────────
@@ -162,7 +150,6 @@ def do_retrieve(retriever, query: str, threshold: float, n_chunks: int = 3):
     hits = retriever.retrieve(query, top_k=n_chunks, update_coverage=True)
     hits = [(c, s) for c, s in hits if s > 0]
     if not hits:
-        # Loosen and retry
         old = retriever.conjunction_threshold
         retriever.conjunction_threshold = 0.0
         hits = retriever.retrieve(query, top_k=2, update_coverage=False)
@@ -191,347 +178,295 @@ def format_retrieval_display(hits: list, q_tokens: set, elapsed_ms: float) -> st
     return "\n".join(lines)
-# ── Main app state ─────────────────────────────────────────────────────────────
 class AppState:
     def __init__(self):
         self.retriever = None
         self.doc_name = None
-        self.doc_chars = 0
-        self.chat_history = []  # list of (user, assistant) for display
-        self.llm_history = []   # list of (user_with_context, assistant) for LLM
-    def reset_doc(self):
         self.retriever = None
         self.doc_name = None
-        self.doc_chars = 0
-        self.reset_chat()
     def reset_chat(self):
-        self.chat_history = []
         self.llm_history = []
-# ── Build the Gradio UI ────────────────────────────────────────────────────────
-def create_app():
-    state = AppState()
-    # Load demo immediately
-    def _load_demo():
-        state.reset_doc()
-        r = ConjunctionReservoir(conjunction_threshold=0.4, coverage_decay=0.04)
-        r.build_index(DEMO_TEXT, verbose=False)
-        state.retriever = r
-        state.doc_name = "ConjunctionReservoir Demo"
-        state.doc_chars = len(DEMO_TEXT)
-        s = r.summary()
-        return (
-            f"✅ **{state.doc_name}** loaded  \n"
-            f"{s['n_chunks']} chunks • {s['n_sentences']} sentences • vocab {s['vocab_size']}"
-        )
-    # ── Gradio layout ──────────────────────────────────────────────────────────
-    css = """
-    #doc-status { border-left: 4px solid #4CAF50; padding: 8px 12px; background: #f9f9f9; border-radius: 4px; }
-    #retrieval-info { font-size: 0.85em; color: #555; background: #f5f5f5; padding: 8px; border-radius: 4px; }
-    .setting-row { display: flex; gap: 12px; align-items: center; }
-    footer { display: none !important; }
-    """
-    with gr.Blocks(
-        title="ConjunctionReservoir Document Chat",
-        css=css,
-        theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate"),
-    ) as demo:
-        # ── Header ─────────────────────────────────────────────────────────────
-        gr.Markdown("""
 # 🧠 ConjunctionReservoir Document Chat
 **Sentence-level conjunction retrieval** — terms must co-appear *in the same sentence* to score.
 Grounded in auditory neuroscience (Norman-Haignere 2025, Vollan 2025). Zero embeddings. Millisecond retrieval.
-        """)
-        with gr.Row():
-            # ── Left column: document + settings ──────────────────────────────
-            with gr.Column(scale=1, min_width=300):
-                gr.Markdown("### 📄 Document")
-                with gr.Tab("Upload File"):
-                    file_input = gr.File(
-                        label="Upload .txt or .pdf",
-                        file_types=[".txt", ".pdf", ".md"],
-                        type="filepath",
-                    )
-                    upload_btn = gr.Button("📥 Load File", variant="primary")
-                with gr.Tab("Paste Text"):
-                    text_input = gr.Textbox(
-                        label="Paste your text here",
-                        lines=8,
-                        placeholder="Paste any text...",
-                    )
-                    paste_name = gr.Textbox(label="Document name", value="pasted_text", max_lines=1)
-                    paste_btn = gr.Button("📥 Load Text", variant="primary")
-                with gr.Tab("Demo"):
-                    gr.Markdown("Load the built-in demo text about ConjunctionReservoir itself.")
-                    demo_btn = gr.Button("🧪 Load Demo", variant="secondary")
-                doc_status = gr.Markdown("*No document loaded*", elem_id="doc-status")
-                gr.Markdown("### ⚙️ Settings")
-                threshold_slider = gr.Slider(
-                    minimum=0.0, maximum=1.0, value=0.4, step=0.05,
-                    label="Conjunction threshold",
-                    info="Fraction of query terms that must co-appear in a sentence (0=TF-IDF, 1=strict AND)"
                 )
-                model_dropdown = gr.Dropdown(
-                    choices=[
-                        "mistralai/Mistral-7B-Instruct-v0.3",
-                        "HuggingFaceH4/zephyr-7b-beta",
-                        "microsoft/Phi-3-mini-4k-instruct",
-                        "google/gemma-2-2b-it",
-                        "Qwen/Qwen2.5-7B-Instruct",
-                    ],
-                    value=DEFAULT_MODEL,
-                    label="LLM model",
-                    info="HuggingFace Inference API (free)"
                 )
-                hf_token_input = gr.Textbox(
-                    label="HuggingFace token (optional)",
-                    placeholder="hf_...",
-                    type="password",
-                    info="Add for higher rate limits. Get one free at huggingface.co/settings/tokens"
-                )
-                show_retrieval_chk = gr.Checkbox(
-                    label="Show retrieved passages",
-                    value=True,
-                )
-                clear_btn = gr.Button("🗑️ Clear conversation", variant="stop", size="sm")
-            # ── Right column: chat ─────────────────────────────────────────────
-            with gr.Column(scale=2):
-                gr.Markdown("### 💬 Chat")
-                chatbot = gr.Chatbot(
-                    label="",
-                    height=480,
-                    show_label=False,
-                    bubble_full_width=False,
-                    render_markdown=True,
-                )
-                retrieval_info = gr.Markdown("", elem_id="retrieval-info")
-                with gr.Row():
-                    msg_input = gr.Textbox(
-                        placeholder="Ask anything about your document…",
-                        show_label=False,
-                        scale=5,
-                        container=False,
-                    )
-                    send_btn = gr.Button("Send ▶", variant="primary", scale=1)
-                gr.Markdown("""
-<small>
-**Tip:** Try queries that require two concepts together, e.g. *"NMDA coincidence detection"*.
-Commands: type `:coverage` to see sweep focus • `:summary` for index stats • `:threshold 0.7` to change on-the-fly
-</small>
-                """)
-        # ── Callbacks ──────────────────────────────────────────────────────────
-        def load_file(filepath, threshold):
-            if not filepath:
-                return "*No file selected*", state.chat_history
-            text = extract_text_from_file(filepath)
-            if text.startswith("ERROR"):
-                return f"❌ {text}", state.chat_history
-            return _index_text(text, Path(filepath).name, threshold)
-        def load_paste(text, name, threshold):
-            if not text or not text.strip():
-                return "*No text provided*", state.chat_history
-            return _index_text(text.strip(), name or "pasted_text", threshold)
-        def load_demo_cb(threshold):
-            status = _load_demo()
-            state.chat_history = []
-            state.llm_history = []
-            return status, []
-        def _index_text(text, name, threshold):
-            state.reset_doc()
-            try:
-                r = ConjunctionReservoir(
-                    conjunction_threshold=float(threshold),
-                    coverage_decay=0.04
-                )
-                r.build_index(text, verbose=False)
-                state.retriever = r
-                state.doc_name = name
-                state.doc_chars = len(text)
-                s = r.summary()
-                status = (
-                    f"✅ **{name}** loaded  \n"
-                    f"{s['n_chunks']} chunks • {s['n_sentences']} sentences • "
-                    f"vocab {s['vocab_size']} • {s['index_time_ms']:.0f}ms"
-                )
-                return status, []
-            except Exception as e:
-                return f"❌ Error indexing: {e}", state.chat_history
-        def clear_chat():
-            state.reset_chat()
-            return [], ""
-        def handle_command(msg: str):
-            """Handle special : commands. Returns (response_str, is_command)."""
-            cmd = msg.strip().lower()
-            if cmd == ":coverage":
-                if state.retriever is None:
-                    return "No document loaded.", True
-                p = state.retriever.coverage_profile()
-                lines = [f"**Vollan sweep coverage** (after {p['n_queries']} queries)  \n"]
-                lines.append(f"Mean coverage: {p['mean_coverage']:.5f}  \n")
-                if p["most_covered"]:
-                    lines.append("**Most visited sentences:**")
-                    for sent, cov in p["most_covered"][:5]:
-                        lines.append(f"- [{cov:.3f}] {sent[:80]}…")
-                return "\n".join(lines), True
-            if cmd == ":summary":
-                if state.retriever is None:
-                    return "No document loaded.", True
-                s = state.retriever.summary()
-                return (
-                    f"**Index summary**  \n"
-                    + "\n".join(f"- **{k}**: {v}" for k, v in s.items())
-                ), True
-            if cmd.startswith(":threshold "):
-                try:
-                    val = float(cmd.split()[1])
-                    val = max(0.0, min(1.0, val))
-                    if state.retriever:
-                        state.retriever.conjunction_threshold = val
-                    return f"✅ Threshold set to **{val:.2f}**", True
-                except Exception:
-                    return "Usage: `:threshold 0.5`", True
-            if cmd == ":help":
-                return (
-                    "**Commands:**\n"
-                    "- `:coverage` — show Vollan sweep focus\n"
-                    "- `:summary` — index statistics\n"
-                    "- `:threshold N` — set conjunction gate (0.0–1.0)\n"
-                    "- `:help` — this message"
-                ), True
-            return "", False
-        def respond(msg, chat_history, threshold, model, hf_token, show_retrieval):
-            if not msg or not msg.strip():
-                yield chat_history, ""
-                return
-            if state.retriever is None:
-                chat_history = chat_history + [(msg, "⚠️ Please load a document first.")]
-                yield chat_history, ""
-                return
-            # Handle commands
-            cmd_response, is_cmd = handle_command(msg)
-            if is_cmd:
-                chat_history = chat_history + [(msg, cmd_response)]
-                yield chat_history, ""
-                return
-            # Retrieve
-            q_tokens = set(re.findall(r'\b[a-zA-Z]{3,}\b', msg.lower()))
-            t0 = time.perf_counter()
-            hits = do_retrieve(state.retriever, msg, float(threshold))
-            elapsed = (time.perf_counter() - t0) * 1000
-            retrieval_display = ""
-            if show_retrieval:
-                retrieval_display = format_retrieval_display(hits, q_tokens, elapsed)
-            # Build LLM prompt
-            context_str = format_context_for_llm(hits)
-            system = (
-                f'You are a document assistant helping the user understand "{state.doc_name}". '
-                f'Answer based on the provided passages. Be specific and cite the text when useful. '
-                f'If the answer is not in the passages, say so clearly. Keep answers concise.'
-            )
-            user_with_context = (
-                f"Question: {msg}\n\n"
-                f"Relevant passages from the document:\n\n{context_str}"
             )
-            messages = format_messages(system, state.llm_history[-MAX_HISTORY:], user_with_context)
-            # Stream response
-            client = get_client(hf_token)
-            partial = ""
-            chat_history = chat_history + [(msg, "")]
-            for token in stream_response(client, model, messages):
-                partial += token
-                chat_history[-1] = (msg, partial)
-                yield chat_history, retrieval_display
-            # Save to history
-            state.llm_history.append((f"Question: {msg}", partial))
-            state.chat_history = chat_history
-        # ── Wire events ────────────────────────────────────────────────────────
-        upload_btn.click(
-            load_file,
-            inputs=[file_input, threshold_slider],
-            outputs=[doc_status, chatbot],
         )
-        paste_btn.click(
-            load_paste,
-            inputs=[text_input, paste_name, threshold_slider],
-            outputs=[doc_status, chatbot],
-        )
-        demo_btn.click(
-            load_demo_cb,
-            inputs=[threshold_slider],
-            outputs=[doc_status, chatbot],
-        )
-        clear_btn.click(clear_chat, outputs=[chatbot, retrieval_info])
-        send_btn.click(
-            respond,
-            inputs=[msg_input, chatbot, threshold_slider, model_dropdown,
-                    hf_token_input, show_retrieval_chk],
-            outputs=[chatbot, retrieval_info],
-        ).then(lambda: "", outputs=[msg_input])
-        msg_input.submit(
-            respond,
-            inputs=[msg_input, chatbot, threshold_slider, model_dropdown,
-                    hf_token_input, show_retrieval_chk],
-            outputs=[chatbot, retrieval_info],
-        ).then(lambda: "", outputs=[msg_input])
-        # Load demo on startup
-        demo.load(_load_demo, outputs=[doc_status])
-    return demo
 if __name__ == "__main__":
-    app = create_app()
-    app.launch(share=False)

 import re
 import os
 import time
 import gradio as gr
 from pathlib import Path
 DEFAULT_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
 FALLBACK_MODEL = "HuggingFaceH4/zephyr-7b-beta"
 MAX_TOKENS = 512
+MAX_HISTORY = 6
 DEMO_TEXT = """The ConjunctionReservoir is a document retrieval system that asks not
 "do these query terms appear somewhere in this chunk?" but rather
 # ── Text extraction ────────────────────────────────────────────────────────────
 def extract_text_from_file(filepath: str) -> str:
     path = Path(filepath)
     ext = path.suffix.lower()
     if ext == ".pdf":
         if not PDF_SUPPORT:
+            return "ERROR: PDF support not available."
         try:
             import fitz
             doc = fitz.open(filepath)
                 return "\n\n".join(p.extract_text() or "" for p in reader.pages)
             except Exception as e:
                 return f"ERROR reading PDF: {e}"
     else:
         try:
             return path.read_text(encoding="utf-8", errors="replace")
         except Exception as e:
+            return f"ERROR reading file: {e}"
 # ── LLM generation ────────────────────────────────────────────────────────────
 def format_messages(system: str, history: list, user_msg: str) -> list:
     messages = [{"role": "system", "content": system}]
+    for turn in history[-MAX_HISTORY:]:
+        messages.append({"role": "user", "content": turn[0]})
+        messages.append({"role": "assistant", "content": turn[1]})
     messages.append({"role": "user", "content": user_msg})
     return messages
 def stream_response(client, model, messages):
     try:
         stream = client.chat.completions.create(
             model=model,
             if delta:
                 yield delta
     except Exception as e:
         if model != FALLBACK_MODEL:
             try:
                 stream = client.chat.completions.create(
                 return
             except Exception:
                 pass
+        yield f"\n\n⚠️ Generation error: {e}\n\nTip: Add a HuggingFace token for better rate limits."
 # ── Retrieval helpers ─────────────────────────────────────────────────────────
     hits = retriever.retrieve(query, top_k=n_chunks, update_coverage=True)
     hits = [(c, s) for c, s in hits if s > 0]
     if not hits:
         old = retriever.conjunction_threshold
         retriever.conjunction_threshold = 0.0
         hits = retriever.retrieve(query, top_k=2, update_coverage=False)
     return "\n".join(lines)
+# ── App state ──────────────────────────────────────────────────────────────────
 class AppState:
     def __init__(self):
         self.retriever = None
         self.doc_name = None
+        self.llm_history = []
+    def reset(self):
         self.retriever = None
         self.doc_name = None
+        self.llm_history = []
     def reset_chat(self):
         self.llm_history = []
+def _build_index(text, name, threshold):
+    r = ConjunctionReservoir(conjunction_threshold=float(threshold), coverage_decay=0.04)
+    r.build_index(text, verbose=False)
+    s = r.summary()
+    status = (
+        f"✅ **{name}** loaded  \n"
+        f"{s['n_chunks']} chunks • {s['n_sentences']} sentences • "
+        f"vocab {s['vocab_size']} • {s['index_time_ms']:.0f}ms"
+    )
+    return status, r
+state = AppState()
+# ── Gradio UI ──────────────────────────────────────────────────────────────────
+with gr.Blocks(title="ConjunctionReservoir Document Chat") as demo:
+    gr.Markdown("""
 # 🧠 ConjunctionReservoir Document Chat
 **Sentence-level conjunction retrieval** — terms must co-appear *in the same sentence* to score.
 Grounded in auditory neuroscience (Norman-Haignere 2025, Vollan 2025). Zero embeddings. Millisecond retrieval.
+    """)
+    with gr.Row():
+        # ── Left column ──────────────────────────────────────────────────────���─
+        with gr.Column(scale=1, min_width=300):
+            gr.Markdown("### 📄 Document")
+            with gr.Tab("Upload File"):
+                file_input = gr.File(
+                    label="Upload .txt or .pdf",
+                    file_types=[".txt", ".pdf", ".md"],
+                    type="filepath",
                 )
+                upload_btn = gr.Button("📥 Load File", variant="primary")
+            with gr.Tab("Paste Text"):
+                text_input = gr.Textbox(
+                    label="Paste your text here",
+                    lines=8,
+                    placeholder="Paste any text...",
                 )
+                paste_name = gr.Textbox(label="Document name", value="pasted_text", max_lines=1)
+                paste_btn = gr.Button("📥 Load Text", variant="primary")
+            with gr.Tab("Demo"):
+                gr.Markdown("Load the built-in demo text about ConjunctionReservoir.")
+                demo_btn = gr.Button("🧪 Load Demo", variant="secondary")
+            doc_status = gr.Markdown("*Loading demo…*")
+            gr.Markdown("### ⚙️ Settings")
+            threshold_slider = gr.Slider(
+                minimum=0.0, maximum=1.0, value=0.4, step=0.05,
+                label="Conjunction threshold",
+                info="Fraction of query terms that must co-appear in a sentence (0=TF-IDF, 1=strict AND)"
+            )
+            model_dropdown = gr.Dropdown(
+                choices=[
+                    "mistralai/Mistral-7B-Instruct-v0.3",
+                    "HuggingFaceH4/zephyr-7b-beta",
+                    "microsoft/Phi-3-mini-4k-instruct",
+                    "google/gemma-2-2b-it",
+                    "Qwen/Qwen2.5-7B-Instruct",
+                ],
+                value=DEFAULT_MODEL,
+                label="LLM model",
+                info="HuggingFace Inference API (free)"
+            )
+            hf_token_input = gr.Textbox(
+                label="HuggingFace token (optional)",
+                placeholder="hf_...",
+                type="password",
+                info="Add for higher rate limits. Free at huggingface.co/settings/tokens"
+            )
+            show_retrieval_chk = gr.Checkbox(label="Show retrieved passages", value=True)
+            clear_btn = gr.Button("🗑️ Clear conversation", variant="stop", size="sm")
+        # ── Right column: chat ─────────────────────────────────────────────────
+        with gr.Column(scale=2):
+            gr.Markdown("### 💬 Chat")
+            # Gradio 6: use type="messages" instead of tuple pairs
+            chatbot = gr.Chatbot(
+                label="",
+                height=480,
+                show_label=False,
+                type="messages",
+            )
+            retrieval_info = gr.Markdown("")
+            with gr.Row():
+                msg_input = gr.Textbox(
+                    placeholder="Ask anything about your document…",
+                    show_label=False,
+                    scale=5,
+                    container=False,
+                )
+                send_btn = gr.Button("Send ▶", variant="primary", scale=1)
+            gr.Markdown(
+                "<small>**Tip:** Try queries requiring two concepts, e.g. *\"NMDA coincidence detection\"*. "
+                "Commands: `:coverage` • `:summary` • `:threshold 0.7` • `:help`</small>"
             )
+    # ── Callbacks ──────────────────────────────────────────────────────────────
+    def load_file(filepath, threshold):
+        if not filepath:
+            return "*No file selected*", []
+        text = extract_text_from_file(filepath)
+        if text.startswith("ERROR"):
+            return f"❌ {text}", []
+        try:
+            status, r = _build_index(text, Path(filepath).name, threshold)
+            state.reset()
+            state.retriever = r
+            state.doc_name = Path(filepath).name
+            return status, []
+        except Exception as e:
+            return f"❌ Error indexing: {e}", []
+    def load_paste(text, name, threshold):
+        if not text or not text.strip():
+            return "*No text provided*", []
+        try:
+            doc_name = name or "pasted_text"
+            status, r = _build_index(text.strip(), doc_name, threshold)
+            state.reset()
+            state.retriever = r
+            state.doc_name = doc_name
+            return status, []
+        except Exception as e:
+            return f"❌ Error indexing: {e}", []
+    def load_demo(threshold):
+        try:
+            status, r = _build_index(DEMO_TEXT, "ConjunctionReservoir Demo", threshold)
+            state.reset()
+            state.retriever = r
+            state.doc_name = "ConjunctionReservoir Demo"
+            return status, []
+        except Exception as e:
+            return f"❌ {e}", []
+    def startup_load():
+        try:
+            status, r = _build_index(DEMO_TEXT, "ConjunctionReservoir Demo", 0.4)
+            state.retriever = r
+            state.doc_name = "ConjunctionReservoir Demo"
+            return status
+        except Exception as e:
+            return f"❌ Startup error: {e}"
+    def clear_chat():
+        state.reset_chat()
+        return [], ""
+    def handle_command(msg: str):
+        cmd = msg.strip().lower()
+        if cmd == ":coverage":
+            if state.retriever is None:
+                return "No document loaded.", True
+            p = state.retriever.coverage_profile()
+            lines = [f"**Vollan sweep coverage** (after {p['n_queries']} queries)\n",
+                     f"Mean coverage: {p['mean_coverage']:.5f}\n"]
+            if p["most_covered"]:
+                lines.append("**Most visited sentences:**")
+                for sent, cov in p["most_covered"][:5]:
+                    lines.append(f"- [{cov:.3f}] {sent[:80]}…")
+            return "\n".join(lines), True
+        if cmd == ":summary":
+            if state.retriever is None:
+                return "No document loaded.", True
+            s = state.retriever.summary()
+            return "**Index summary**\n" + "\n".join(f"- **{k}**: {v}" for k, v in s.items()), True
+        if cmd.startswith(":threshold "):
+            try:
+                val = max(0.0, min(1.0, float(cmd.split()[1])))
+                if state.retriever:
+                    state.retriever.conjunction_threshold = val
+                return f"✅ Threshold set to **{val:.2f}**", True
+            except Exception:
+                return "Usage: `:threshold 0.5`", True
+        if cmd == ":help":
+            return ("**Commands:**\n"
+                    "- `:coverage` — Vollan sweep focus\n"
+                    "- `:summary` — index statistics\n"
+                    "- `:threshold N` — set gate (0.0–1.0)\n"
+                    "- `:help` — this message"), True
+        return "", False
+    def respond(msg, chat_history, threshold, model, hf_token, show_retrieval):
+        if not msg or not msg.strip():
+            yield chat_history, ""
+            return
+        if state.retriever is None:
+            chat_history = chat_history + [
+                {"role": "user", "content": msg},
+                {"role": "assistant", "content": "⚠️ Please load a document first."}
+            ]
+            yield chat_history, ""
+            return
+        cmd_response, is_cmd = handle_command(msg)
+        if is_cmd:
+            chat_history = chat_history + [
+                {"role": "user", "content": msg},
+                {"role": "assistant", "content": cmd_response}
+            ]
+            yield chat_history, ""
+            return
+        # Retrieve
+        q_tokens = set(re.findall(r'\b[a-zA-Z]{3,}\b', msg.lower()))
+        t0 = time.perf_counter()
+        hits = do_retrieve(state.retriever, msg, float(threshold))
+        elapsed = (time.perf_counter() - t0) * 1000
+        retrieval_display = format_retrieval_display(hits, q_tokens, elapsed) if show_retrieval else ""
+        context_str = format_context_for_llm(hits)
+        system = (
+            f'You are a document assistant helping the user understand "{state.doc_name}". '
+            f'Answer based on the provided passages. Be specific and cite text when useful. '
+            f'If the answer is not in the passages, say so. Keep answers concise.'
         )
+        user_with_context = f"Question: {msg}\n\nRelevant passages:\n\n{context_str}"
+        messages = format_messages(system, state.llm_history, user_with_context)
+        client = get_client(hf_token)
+        partial = ""
+        chat_history = chat_history + [
+            {"role": "user", "content": msg},
+            {"role": "assistant", "content": ""}
+        ]
+        for token in stream_response(client, model, messages):
+            partial += token
+            chat_history[-1] = {"role": "assistant", "content": partial}
+            yield chat_history, retrieval_display
+        state.llm_history.append((f"Question: {msg}", partial))
+    # ── Wire events ────────────────────────────────────────────────────────────
+    upload_btn.click(load_file, inputs=[file_input, threshold_slider], outputs=[doc_status, chatbot])
+    paste_btn.click(load_paste, inputs=[text_input, paste_name, threshold_slider], outputs=[doc_status, chatbot])
+    demo_btn.click(load_demo, inputs=[threshold_slider], outputs=[doc_status, chatbot])
+    clear_btn.click(clear_chat, outputs=[chatbot, retrieval_info])
+    send_btn.click(
+        respond,
+        inputs=[msg_input, chatbot, threshold_slider, model_dropdown, hf_token_input, show_retrieval_chk],
+        outputs=[chatbot, retrieval_info],
+    ).then(lambda: "", outputs=[msg_input])
+    msg_input.submit(
+        respond,
+        inputs=[msg_input, chatbot, threshold_slider, model_dropdown, hf_token_input, show_retrieval_chk],
+        outputs=[chatbot, retrieval_info],
+    ).then(lambda: "", outputs=[msg_input])
+    demo.load(startup_load, outputs=[doc_status])
 if __name__ == "__main__":
+    demo.launch()