Spaces:

Aluode
/

Conjuction-Reservoir-Rag

Sleeping

App Files Files Community

Aluode commited on 22 days ago

Commit

2b083ae

verified ·

1 Parent(s): e276dcf

Update app.py

Browse files

Files changed (1) hide show

app.py +314 -248

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ Generation uses HuggingFace Inference API (free, no key required).
 import re
 import os
 import time
 import gradio as gr
 from pathlib import Path
@@ -33,7 +34,7 @@ except ImportError:
 DEFAULT_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
 FALLBACK_MODEL = "HuggingFaceH4/zephyr-7b-beta"
 MAX_TOKENS = 512
-MAX_HISTORY = 6
 DEMO_TEXT = """The ConjunctionReservoir is a document retrieval system that asks not
 "do these query terms appear somewhere in this chunk?" but rather
@@ -59,11 +60,13 @@ co-occurrence queries. Use threshold=0.0 to approach standard TF-IDF."""
 # ── Text extraction ────────────────────────────────────────────────────────────
 def extract_text_from_file(filepath: str) -> str:
     path = Path(filepath)
     ext = path.suffix.lower()
     if ext == ".pdf":
         if not PDF_SUPPORT:
-            return "ERROR: PDF support not available."
         try:
             import fitz
             doc = fitz.open(filepath)
@@ -75,12 +78,19 @@ def extract_text_from_file(filepath: str) -> str:
                 return "\n\n".join(p.extract_text() or "" for p in reader.pages)
             except Exception as e:
                 return f"ERROR reading PDF: {e}"
-    else:
         try:
             return path.read_text(encoding="utf-8", errors="replace")
         except Exception as e:
             return f"ERROR reading file: {e}"
 # ── LLM generation ────────────────────────────────────────────────────────────
@@ -91,14 +101,15 @@ def get_client(hf_token: str = "") -> InferenceClient:
 def format_messages(system: str, history: list, user_msg: str) -> list:
     messages = [{"role": "system", "content": system}]
-    for turn in history[-MAX_HISTORY:]:
-        messages.append({"role": "user", "content": turn[0]})
-        messages.append({"role": "assistant", "content": turn[1]})
     messages.append({"role": "user", "content": user_msg})
     return messages
 def stream_response(client, model, messages):
     try:
         stream = client.chat.completions.create(
             model=model,
@@ -112,6 +123,7 @@ def stream_response(client, model, messages):
             if delta:
                 yield delta
     except Exception as e:
         if model != FALLBACK_MODEL:
             try:
                 stream = client.chat.completions.create(
@@ -128,7 +140,7 @@ def stream_response(client, model, messages):
                 return
             except Exception:
                 pass
-        yield f"\n\n⚠️ Generation error: {e}\n\nTip: Add a HuggingFace token for better rate limits."
 # ── Retrieval helpers ─────────────────────────────────────────────────────────
@@ -150,6 +162,7 @@ def do_retrieve(retriever, query: str, threshold: float, n_chunks: int = 3):
     hits = retriever.retrieve(query, top_k=n_chunks, update_coverage=True)
     hits = [(c, s) for c, s in hits if s > 0]
     if not hits:
         old = retriever.conjunction_threshold
         retriever.conjunction_threshold = 0.0
         hits = retriever.retrieve(query, top_k=2, update_coverage=False)
@@ -178,295 +191,348 @@ def format_retrieval_display(hits: list, q_tokens: set, elapsed_ms: float) -> st
     return "\n".join(lines)
-# ── App state ──────────────────────────────────────────────────────────────────
 class AppState:
     def __init__(self):
         self.retriever = None
         self.doc_name = None
-        self.llm_history = []
-    def reset(self):
         self.retriever = None
         self.doc_name = None
-        self.llm_history = []
     def reset_chat(self):
         self.llm_history = []
-def _build_index(text, name, threshold):
-    r = ConjunctionReservoir(conjunction_threshold=float(threshold), coverage_decay=0.04)
-    r.build_index(text, verbose=False)
-    s = r.summary()
-    status = (
-        f"✅ **{name}** loaded  \n"
-        f"{s['n_chunks']} chunks • {s['n_sentences']} sentences • "
-        f"vocab {s['vocab_size']} • {s['index_time_ms']:.0f}ms"
-    )
-    return status, r
-state = AppState()
-# ── Gradio UI ──────────────────────────────────────────────────────────────────
-with gr.Blocks(title="ConjunctionReservoir Document Chat") as demo:
-    gr.Markdown("""
 # 🧠 ConjunctionReservoir Document Chat
 **Sentence-level conjunction retrieval** — terms must co-appear *in the same sentence* to score.
 Grounded in auditory neuroscience (Norman-Haignere 2025, Vollan 2025). Zero embeddings. Millisecond retrieval.
-    """)
-    with gr.Row():
-        # ── Left column ────────────────────────────────────────────────────────
-        with gr.Column(scale=1, min_width=300):
-            gr.Markdown("### 📄 Document")
-            with gr.Tab("Upload File"):
-                file_input = gr.File(
-                    label="Upload .txt or .pdf",
-                    file_types=[".txt", ".pdf", ".md"],
-                    type="filepath",
                 )
-                upload_btn = gr.Button("📥 Load File", variant="primary")
-            with gr.Tab("Paste Text"):
-                text_input = gr.Textbox(
-                    label="Paste your text here",
-                    lines=8,
-                    placeholder="Paste any text...",
                 )
-                paste_name = gr.Textbox(label="Document name", value="pasted_text", max_lines=1)
-                paste_btn = gr.Button("📥 Load Text", variant="primary")
-            with gr.Tab("Demo"):
-                gr.Markdown("Load the built-in demo text about ConjunctionReservoir.")
-                demo_btn = gr.Button("🧪 Load Demo", variant="secondary")
-            doc_status = gr.Markdown("*Loading demo…*")
-            gr.Markdown("### ⚙️ Settings")
-            threshold_slider = gr.Slider(
-                minimum=0.0, maximum=1.0, value=0.4, step=0.05,
-                label="Conjunction threshold",
-                info="Fraction of query terms that must co-appear in a sentence (0=TF-IDF, 1=strict AND)"
-            )
-            model_dropdown = gr.Dropdown(
-                choices=[
-                    "mistralai/Mistral-7B-Instruct-v0.3",
-                    "HuggingFaceH4/zephyr-7b-beta",
-                    "microsoft/Phi-3-mini-4k-instruct",
-                    "google/gemma-2-2b-it",
-                    "Qwen/Qwen2.5-7B-Instruct",
-                ],
-                value=DEFAULT_MODEL,
-                label="LLM model",
-                info="HuggingFace Inference API (free)"
-            )
-            hf_token_input = gr.Textbox(
-                label="HuggingFace token (optional)",
-                placeholder="hf_...",
-                type="password",
-                info="Add for higher rate limits. Free at huggingface.co/settings/tokens"
-            )
-            show_retrieval_chk = gr.Checkbox(label="Show retrieved passages", value=True)
-            clear_btn = gr.Button("🗑️ Clear conversation", variant="stop", size="sm")
-        # ── Right column: chat ─────────────────────────────────────────────────
-        with gr.Column(scale=2):
-            gr.Markdown("### 💬 Chat")
-            # Gradio 6: use type="messages" instead of tuple pairs
-            chatbot = gr.Chatbot(
-                label="",
-                height=480,
-                show_label=False,
-                type="messages",
-            )
-            retrieval_info = gr.Markdown("")
-            with gr.Row():
-                msg_input = gr.Textbox(
-                    placeholder="Ask anything about your document…",
-                    show_label=False,
-                    scale=5,
-                    container=False,
-                )
-                send_btn = gr.Button("Send ▶", variant="primary", scale=1)
-            gr.Markdown(
-                "<small>**Tip:** Try queries requiring two concepts, e.g. *\"NMDA coincidence detection\"*. "
-                "Commands: `:coverage` • `:summary` • `:threshold 0.7` • `:help`</small>"
             )
-    # ── Callbacks ──────────────────────────────────────────────────────────────
-    def load_file(filepath, threshold):
-        if not filepath:
-            return "*No file selected*", []
-        text = extract_text_from_file(filepath)
-        if text.startswith("ERROR"):
-            return f"❌ {text}", []
-        try:
-            status, r = _build_index(text, Path(filepath).name, threshold)
-            state.reset()
-            state.retriever = r
-            state.doc_name = Path(filepath).name
-            return status, []
-        except Exception as e:
-            return f"❌ Error indexing: {e}", []
-    def load_paste(text, name, threshold):
-        if not text or not text.strip():
-            return "*No text provided*", []
-        try:
-            doc_name = name or "pasted_text"
-            status, r = _build_index(text.strip(), doc_name, threshold)
-            state.reset()
-            state.retriever = r
-            state.doc_name = doc_name
-            return status, []
-        except Exception as e:
-            return f"❌ Error indexing: {e}", []
-    def load_demo(threshold):
-        try:
-            status, r = _build_index(DEMO_TEXT, "ConjunctionReservoir Demo", threshold)
-            state.reset()
-            state.retriever = r
-            state.doc_name = "ConjunctionReservoir Demo"
-            return status, []
-        except Exception as e:
-            return f"❌ {e}", []
-    def startup_load():
-        try:
-            status, r = _build_index(DEMO_TEXT, "ConjunctionReservoir Demo", 0.4)
-            state.retriever = r
-            state.doc_name = "ConjunctionReservoir Demo"
-            return status
-        except Exception as e:
-            return f"❌ Startup error: {e}"
-    def clear_chat():
-        state.reset_chat()
-        return [], ""
-    def handle_command(msg: str):
-        cmd = msg.strip().lower()
-        if cmd == ":coverage":
-            if state.retriever is None:
-                return "No document loaded.", True
-            p = state.retriever.coverage_profile()
-            lines = [f"**Vollan sweep coverage** (after {p['n_queries']} queries)\n",
-                     f"Mean coverage: {p['mean_coverage']:.5f}\n"]
-            if p["most_covered"]:
-                lines.append("**Most visited sentences:**")
-                for sent, cov in p["most_covered"][:5]:
-                    lines.append(f"- [{cov:.3f}] {sent[:80]}…")
-            return "\n".join(lines), True
-        if cmd == ":summary":
-            if state.retriever is None:
-                return "No document loaded.", True
-            s = state.retriever.summary()
-            return "**Index summary**\n" + "\n".join(f"- **{k}**: {v}" for k, v in s.items()), True
-        if cmd.startswith(":threshold "):
-            try:
-                val = max(0.0, min(1.0, float(cmd.split()[1])))
-                if state.retriever:
-                    state.retriever.conjunction_threshold = val
-                return f"✅ Threshold set to **{val:.2f}**", True
-            except Exception:
-                return "Usage: `:threshold 0.5`", True
-        if cmd == ":help":
-            return ("**Commands:**\n"
-                    "- `:coverage` — Vollan sweep focus\n"
-                    "- `:summary` — index statistics\n"
-                    "- `:threshold N` — set gate (0.0–1.0)\n"
-                    "- `:help` — this message"), True
-        return "", False
-    def respond(msg, chat_history, threshold, model, hf_token, show_retrieval):
-        if not msg or not msg.strip():
-            yield chat_history, ""
-            return
-        if state.retriever is None:
-            chat_history = chat_history + [
-                {"role": "user", "content": msg},
-                {"role": "assistant", "content": "⚠️ Please load a document first."}
-            ]
-            yield chat_history, ""
-            return
-        cmd_response, is_cmd = handle_command(msg)
-        if is_cmd:
-            chat_history = chat_history + [
-                {"role": "user", "content": msg},
-                {"role": "assistant", "content": cmd_response}
-            ]
-            yield chat_history, ""
-            return
-        # Retrieve
-        q_tokens = set(re.findall(r'\b[a-zA-Z]{3,}\b', msg.lower()))
-        t0 = time.perf_counter()
-        hits = do_retrieve(state.retriever, msg, float(threshold))
-        elapsed = (time.perf_counter() - t0) * 1000
-        retrieval_display = format_retrieval_display(hits, q_tokens, elapsed) if show_retrieval else ""
-        context_str = format_context_for_llm(hits)
-        system = (
-            f'You are a document assistant helping the user understand "{state.doc_name}". '
-            f'Answer based on the provided passages. Be specific and cite text when useful. '
-            f'If the answer is not in the passages, say so. Keep answers concise.'
         )
-        user_with_context = f"Question: {msg}\n\nRelevant passages:\n\n{context_str}"
-        messages = format_messages(system, state.llm_history, user_with_context)
-        client = get_client(hf_token)
-        partial = ""
-        chat_history = chat_history + [
-            {"role": "user", "content": msg},
-            {"role": "assistant", "content": ""}
-        ]
-        for token in stream_response(client, model, messages):
-            partial += token
-            chat_history[-1] = {"role": "assistant", "content": partial}
-            yield chat_history, retrieval_display
-        state.llm_history.append((f"Question: {msg}", partial))
-    # ── Wire events ────────────────────────────────────────────────────────────
-    upload_btn.click(load_file, inputs=[file_input, threshold_slider], outputs=[doc_status, chatbot])
-    paste_btn.click(load_paste, inputs=[text_input, paste_name, threshold_slider], outputs=[doc_status, chatbot])
-    demo_btn.click(load_demo, inputs=[threshold_slider], outputs=[doc_status, chatbot])
-    clear_btn.click(clear_chat, outputs=[chatbot, retrieval_info])
-    send_btn.click(
-        respond,
-        inputs=[msg_input, chatbot, threshold_slider, model_dropdown, hf_token_input, show_retrieval_chk],
-        outputs=[chatbot, retrieval_info],
-    ).then(lambda: "", outputs=[msg_input])
-    msg_input.submit(
-        respond,
-        inputs=[msg_input, chatbot, threshold_slider, model_dropdown, hf_token_input, show_retrieval_chk],
-        outputs=[chatbot, retrieval_info],
-    ).then(lambda: "", outputs=[msg_input])
-    demo.load(startup_load, outputs=[doc_status])
 if __name__ == "__main__":
-    demo.launch()

 import re
 import os
 import time
+import json
 import gradio as gr
 from pathlib import Path
 DEFAULT_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
 FALLBACK_MODEL = "HuggingFaceH4/zephyr-7b-beta"
 MAX_TOKENS = 512
+MAX_HISTORY = 6  # turns to keep in context
 DEMO_TEXT = """The ConjunctionReservoir is a document retrieval system that asks not
 "do these query terms appear somewhere in this chunk?" but rather
 # ── Text extraction ────────────────────────────────────────────────────────────
 def extract_text_from_file(filepath: str) -> str:
+    """Extract text from .txt or .pdf file."""
     path = Path(filepath)
     ext = path.suffix.lower()
     if ext == ".pdf":
         if not PDF_SUPPORT:
+            return "ERROR: PDF support not available. Please install PyMuPDF or pypdf."
         try:
             import fitz
             doc = fitz.open(filepath)
                 return "\n\n".join(p.extract_text() or "" for p in reader.pages)
             except Exception as e:
                 return f"ERROR reading PDF: {e}"
+    elif ext in (".txt", ".md", ".rst", ".text"):
         try:
             return path.read_text(encoding="utf-8", errors="replace")
         except Exception as e:
             return f"ERROR reading file: {e}"
+    else:
+        try:
+            return path.read_text(encoding="utf-8", errors="replace")
+        except Exception as e:
+            return f"ERROR: Unsupported file type {ext}. Try .txt or .pdf"
 # ── LLM generation ────────────────────────────────────────────────────────────
 def format_messages(system: str, history: list, user_msg: str) -> list:
     messages = [{"role": "system", "content": system}]
+    for user_h, asst_h in history[-MAX_HISTORY:]:
+        messages.append({"role": "user", "content": user_h})
+        messages.append({"role": "assistant", "content": asst_h})
     messages.append({"role": "user", "content": user_msg})
     return messages
 def stream_response(client, model, messages):
+    """Stream tokens from HF Inference API."""
     try:
         stream = client.chat.completions.create(
             model=model,
             if delta:
                 yield delta
     except Exception as e:
+        # Try fallback model
         if model != FALLBACK_MODEL:
             try:
                 stream = client.chat.completions.create(
                 return
             except Exception:
                 pass
+        yield f"\n\n⚠️ Generation error: {e}\n\nTip: Add a HuggingFace token in Settings for better rate limits."
 # ── Retrieval helpers ─────────────────────────────────────────────────────────
     hits = retriever.retrieve(query, top_k=n_chunks, update_coverage=True)
     hits = [(c, s) for c, s in hits if s > 0]
     if not hits:
+        # Loosen and retry
         old = retriever.conjunction_threshold
         retriever.conjunction_threshold = 0.0
         hits = retriever.retrieve(query, top_k=2, update_coverage=False)
     return "\n".join(lines)
+# ── Main app state ─────────────────────────────────────────────────────────────
 class AppState:
     def __init__(self):
         self.retriever = None
         self.doc_name = None
+        self.doc_chars = 0
+        self.chat_history = []  # list of (user, assistant) for display
+        self.llm_history = []   # list of (user_with_context, assistant) for LLM
+    def reset_doc(self):
         self.retriever = None
         self.doc_name = None
+        self.doc_chars = 0
+        self.reset_chat()
     def reset_chat(self):
+        self.chat_history = []
         self.llm_history = []
+# ── Build the Gradio UI ────────────────────────────────────────────────────────
+def create_app():
+    state = AppState()
+    # Load demo immediately
+    def _load_demo():
+        state.reset_doc()
+        r = ConjunctionReservoir(conjunction_threshold=0.4, coverage_decay=0.04)
+        r.build_index(DEMO_TEXT, verbose=False)
+        state.retriever = r
+        state.doc_name = "ConjunctionReservoir Demo"
+        state.doc_chars = len(DEMO_TEXT)
+        s = r.summary()
+        return (
+            f"✅ **{state.doc_name}** loaded  \n"
+            f"{s['n_chunks']} chunks • {s['n_sentences']} sentences • vocab {s['vocab_size']}"
+        )
+    # ── Gradio layout ──────────────────────────────────────────────────────────
+    css = """
+    #doc-status { border-left: 4px solid #4CAF50; padding: 8px 12px; background: #f9f9f9; border-radius: 4px; }
+    #retrieval-info { font-size: 0.85em; color: #555; background: #f5f5f5; padding: 8px; border-radius: 4px; }
+    .setting-row { display: flex; gap: 12px; align-items: center; }
+    footer { display: none !important; }
+    """
+    theme = gr.themes.Soft(primary_hue="blue", neutral_hue="slate")
+    # Gradio 6.0 change: removed css and theme from Blocks init.
+    with gr.Blocks(
+        title="ConjunctionReservoir Document Chat",
+    ) as demo:
+        # ── Header ─────────────────────────────────────────────────────────────
+        gr.Markdown("""
 # 🧠 ConjunctionReservoir Document Chat
 **Sentence-level conjunction retrieval** — terms must co-appear *in the same sentence* to score.
 Grounded in auditory neuroscience (Norman-Haignere 2025, Vollan 2025). Zero embeddings. Millisecond retrieval.
+        """)
+        with gr.Row():
+            # ── Left column: document + settings ──────────────────────────────
+            with gr.Column(scale=1, min_width=300):
+                gr.Markdown("### 📄 Document")
+                with gr.Tab("Upload File"):
+                    file_input = gr.File(
+                        label="Upload .txt or .pdf",
+                        file_types=[".txt", ".pdf", ".md"],
+                        type="filepath",
+                    )
+                    upload_btn = gr.Button("📥 Load File", variant="primary")
+                with gr.Tab("Paste Text"):
+                    text_input = gr.Textbox(
+                        label="Paste your text here",
+                        lines=8,
+                        placeholder="Paste any text...",
+                    )
+                    paste_name = gr.Textbox(label="Document name", value="pasted_text", max_lines=1)
+                    paste_btn = gr.Button("📥 Load Text", variant="primary")
+                with gr.Tab("Demo"):
+                    gr.Markdown("Load the built-in demo text about ConjunctionReservoir itself.")
+                    demo_btn = gr.Button("🧪 Load Demo", variant="secondary")
+                doc_status = gr.Markdown("*No document loaded*", elem_id="doc-status")
+                gr.Markdown("### ⚙️ Settings")
+                threshold_slider = gr.Slider(
+                    minimum=0.0, maximum=1.0, value=0.4, step=0.05,
+                    label="Conjunction threshold",
+                    info="Fraction of query terms that must co-appear in a sentence (0=TF-IDF, 1=strict AND)"
                 )
+                model_dropdown = gr.Dropdown(
+                    choices=[
+                        "mistralai/Mistral-7B-Instruct-v0.3",
+                        "HuggingFaceH4/zephyr-7b-beta",
+                        "microsoft/Phi-3-mini-4k-instruct",
+                        "google/gemma-2-2b-it",
+                        "Qwen/Qwen2.5-7B-Instruct",
+                    ],
+                    value=DEFAULT_MODEL,
+                    label="LLM model",
+                    info="HuggingFace Inference API (free)"
                 )
+                hf_token_input = gr.Textbox(
+                    label="HuggingFace token (optional)",
+                    placeholder="hf_...",
+                    type="password",
+                    info="Add for higher rate limits. Get one free at huggingface.co/settings/tokens"
+                )
+                show_retrieval_chk = gr.Checkbox(
+                    label="Show retrieved passages",
+                    value=True,
+                )
+                clear_btn = gr.Button("🗑️ Clear conversation", variant="stop", size="sm")
+            # ── Right column: chat ─────────────────────────────────────────────
+            with gr.Column(scale=2):
+                gr.Markdown("### 💬 Chat")
+                # Gradio 6.0 change: removed bubble_full_width and render_markdown
+                chatbot = gr.Chatbot(
+                    label="",
+                    height=480,
+                    show_label=False,
+                )
+                retrieval_info = gr.Markdown("", elem_id="retrieval-info")
+                with gr.Row():
+                    msg_input = gr.Textbox(
+                        placeholder="Ask anything about your document…",
+                        show_label=False,
+                        scale=5,
+                        container=False,
+                    )
+                    send_btn = gr.Button("Send ▶", variant="primary", scale=1)
+                gr.Markdown("""
+<small>
+**Tip:** Try queries that require two concepts together, e.g. *"NMDA coincidence detection"*.
+Commands: type `:coverage` to see sweep focus • `:summary` for index stats • `:threshold 0.7` to change on-the-fly
+</small>
+                """)
+        # ── Callbacks ──────────────────────────────────────────────────────────
+        def load_file(filepath, threshold):
+            if not filepath:
+                return "*No file selected*", state.chat_history
+            text = extract_text_from_file(filepath)
+            if text.startswith("ERROR"):
+                return f"❌ {text}", state.chat_history
+            return _index_text(text, Path(filepath).name, threshold)
+        def load_paste(text, name, threshold):
+            if not text or not text.strip():
+                return "*No text provided*", state.chat_history
+            return _index_text(text.strip(), name or "pasted_text", threshold)
+        def load_demo_cb(threshold):
+            status = _load_demo()
+            state.chat_history = []
+            state.llm_history = []
+            return status, []
+        def _index_text(text, name, threshold):
+            state.reset_doc()
+            try:
+                r = ConjunctionReservoir(
+                    conjunction_threshold=float(threshold),
+                    coverage_decay=0.04
+                )
+                r.build_index(text, verbose=False)
+                state.retriever = r
+                state.doc_name = name
+                state.doc_chars = len(text)
+                s = r.summary()
+                status = (
+                    f"✅ **{name}** loaded  \n"
+                    f"{s['n_chunks']} chunks • {s['n_sentences']} sentences • "
+                    f"vocab {s['vocab_size']} • {s['index_time_ms']:.0f}ms"
+                )
+                return status, []
+            except Exception as e:
+                return f"❌ Error indexing: {e}", state.chat_history
+        def clear_chat():
+            state.reset_chat()
+            return [], ""
+        def handle_command(msg: str):
+            """Handle special : commands. Returns (response_str, is_command)."""
+            cmd = msg.strip().lower()
+            if cmd == ":coverage":
+                if state.retriever is None:
+                    return "No document loaded.", True
+                p = state.retriever.coverage_profile()
+                lines = [f"**Vollan sweep coverage** (after {p['n_queries']} queries)  \n"]
+                lines.append(f"Mean coverage: {p['mean_coverage']:.5f}  \n")
+                if p["most_covered"]:
+                    lines.append("**Most visited sentences:**")
+                    for sent, cov in p["most_covered"][:5]:
+                        lines.append(f"- [{cov:.3f}] {sent[:80]}…")
+                return "\n".join(lines), True
+            if cmd == ":summary":
+                if state.retriever is None:
+                    return "No document loaded.", True
+                s = state.retriever.summary()
+                return (
+                    f"**Index summary** \n"
+                    + "\n".join(f"- **{k}**: {v}" for k, v in s.items())
+                ), True
+            if cmd.startswith(":threshold "):
+                try:
+                    val = float(cmd.split()[1])
+                    val = max(0.0, min(1.0, val))
+                    if state.retriever:
+                        state.retriever.conjunction_threshold = val
+                    return f"✅ Threshold set to **{val:.2f}**", True
+                except Exception:
+                    return "Usage: `:threshold 0.5`", True
+            if cmd == ":help":
+                return (
+                    "**Commands:**\n"
+                    "- `:coverage` — show Vollan sweep focus\n"
+                    "- `:summary` — index statistics\n"
+                    "- `:threshold N` — set conjunction gate (0.0–1.0)\n"
+                    "- `:help` — this message"
+                ), True
+            return "", False
+        def respond(msg, chat_history, threshold, model, hf_token, show_retrieval):
+            if not msg or not msg.strip():
+                yield chat_history, ""
+                return
+            if state.retriever is None:
+                chat_history = chat_history + [(msg, "⚠️ Please load a document first.")]
+                yield chat_history, ""
+                return
+            # Handle commands
+            cmd_response, is_cmd = handle_command(msg)
+            if is_cmd:
+                chat_history = chat_history + [(msg, cmd_response)]
+                yield chat_history, ""
+                return
+            # Retrieve
+            q_tokens = set(re.findall(r'\b[a-zA-Z]{3,}\b', msg.lower()))
+            t0 = time.perf_counter()
+            hits = do_retrieve(state.retriever, msg, float(threshold))
+            elapsed = (time.perf_counter() - t0) * 1000
+            retrieval_display = ""
+            if show_retrieval:
+                retrieval_display = format_retrieval_display(hits, q_tokens, elapsed)
+            # Build LLM prompt
+            context_str = format_context_for_llm(hits)
+            system = (
+                f'You are a document assistant helping the user understand "{state.doc_name}". '
+                f'Answer based on the provided passages. Be specific and cite the text when useful. '
+                f'If the answer is not in the passages, say so clearly. Keep answers concise.'
+            )
+            user_with_context = (
+                f"Question: {msg}\n\n"
+                f"Relevant passages from the document:\n\n{context_str}"
             )
+            messages = format_messages(system, state.llm_history[-MAX_HISTORY:], user_with_context)
+            # Stream response
+            client = get_client(hf_token)
+            partial = ""
+            chat_history = chat_history + [(msg, "")]
+            for token in stream_response(client, model, messages):
+                partial += token
+                chat_history[-1] = (msg, partial)
+                yield chat_history, retrieval_display
+            # Save to history
+            state.llm_history.append((f"Question: {msg}", partial))
+            state.chat_history = chat_history
+        # ── Wire events ────────────────────────────────────────────────────────
+        upload_btn.click(
+            load_file,
+            inputs=[file_input, threshold_slider],
+            outputs=[doc_status, chatbot],
         )
+        paste_btn.click(
+            load_paste,
+            inputs=[text_input, paste_name, threshold_slider],
+            outputs=[doc_status, chatbot],
+        )
+        demo_btn.click(
+            load_demo_cb,
+            inputs=[threshold_slider],
+            outputs=[doc_status, chatbot],
+        )
+        clear_btn.click(clear_chat, outputs=[chatbot, retrieval_info])
+        send_btn.click(
+            respond,
+            inputs=[msg_input, chatbot, threshold_slider, model_dropdown,
+                    hf_token_input, show_retrieval_chk],
+            outputs=[chatbot, retrieval_info],
+        ).then(lambda: "", outputs=[msg_input])
+        msg_input.submit(
+            respond,
+            inputs=[msg_input, chatbot, threshold_slider, model_dropdown,
+                    hf_token_input, show_retrieval_chk],
+            outputs=[chatbot, retrieval_info],
+        ).then(lambda: "", outputs=[msg_input])
+        # Load demo on startup
+        demo.load(_load_demo, outputs=[doc_status])
+    return demo, css, theme
 if __name__ == "__main__":
+    # Gradio 6.0 change: Pass css and theme into launch()
+    app, app_css, app_theme = create_app()
+    app.launch(share=False, css=app_css, theme=app_theme)