Aluode commited on
Commit
2b083ae
Β·
verified Β·
1 Parent(s): e276dcf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +314 -248
app.py CHANGED
@@ -9,6 +9,7 @@ Generation uses HuggingFace Inference API (free, no key required).
9
  import re
10
  import os
11
  import time
 
12
  import gradio as gr
13
  from pathlib import Path
14
 
@@ -33,7 +34,7 @@ except ImportError:
33
  DEFAULT_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
34
  FALLBACK_MODEL = "HuggingFaceH4/zephyr-7b-beta"
35
  MAX_TOKENS = 512
36
- MAX_HISTORY = 6
37
 
38
  DEMO_TEXT = """The ConjunctionReservoir is a document retrieval system that asks not
39
  "do these query terms appear somewhere in this chunk?" but rather
@@ -59,11 +60,13 @@ co-occurrence queries. Use threshold=0.0 to approach standard TF-IDF."""
59
  # ── Text extraction ────────────────────────────────────────────────────────────
60
 
61
  def extract_text_from_file(filepath: str) -> str:
 
62
  path = Path(filepath)
63
  ext = path.suffix.lower()
 
64
  if ext == ".pdf":
65
  if not PDF_SUPPORT:
66
- return "ERROR: PDF support not available."
67
  try:
68
  import fitz
69
  doc = fitz.open(filepath)
@@ -75,12 +78,19 @@ def extract_text_from_file(filepath: str) -> str:
75
  return "\n\n".join(p.extract_text() or "" for p in reader.pages)
76
  except Exception as e:
77
  return f"ERROR reading PDF: {e}"
78
- else:
 
79
  try:
80
  return path.read_text(encoding="utf-8", errors="replace")
81
  except Exception as e:
82
  return f"ERROR reading file: {e}"
83
 
 
 
 
 
 
 
84
 
85
  # ── LLM generation ────────────────────────────────────────────────────────────
86
 
@@ -91,14 +101,15 @@ def get_client(hf_token: str = "") -> InferenceClient:
91
 
92
  def format_messages(system: str, history: list, user_msg: str) -> list:
93
  messages = [{"role": "system", "content": system}]
94
- for turn in history[-MAX_HISTORY:]:
95
- messages.append({"role": "user", "content": turn[0]})
96
- messages.append({"role": "assistant", "content": turn[1]})
97
  messages.append({"role": "user", "content": user_msg})
98
  return messages
99
 
100
 
101
  def stream_response(client, model, messages):
 
102
  try:
103
  stream = client.chat.completions.create(
104
  model=model,
@@ -112,6 +123,7 @@ def stream_response(client, model, messages):
112
  if delta:
113
  yield delta
114
  except Exception as e:
 
115
  if model != FALLBACK_MODEL:
116
  try:
117
  stream = client.chat.completions.create(
@@ -128,7 +140,7 @@ def stream_response(client, model, messages):
128
  return
129
  except Exception:
130
  pass
131
- yield f"\n\n⚠️ Generation error: {e}\n\nTip: Add a HuggingFace token for better rate limits."
132
 
133
 
134
  # ── Retrieval helpers ─────────────────────────────────────────────────────────
@@ -150,6 +162,7 @@ def do_retrieve(retriever, query: str, threshold: float, n_chunks: int = 3):
150
  hits = retriever.retrieve(query, top_k=n_chunks, update_coverage=True)
151
  hits = [(c, s) for c, s in hits if s > 0]
152
  if not hits:
 
153
  old = retriever.conjunction_threshold
154
  retriever.conjunction_threshold = 0.0
155
  hits = retriever.retrieve(query, top_k=2, update_coverage=False)
@@ -178,295 +191,348 @@ def format_retrieval_display(hits: list, q_tokens: set, elapsed_ms: float) -> st
178
  return "\n".join(lines)
179
 
180
 
181
- # ── App state ──────────────────────────────────────────────────────────────────
182
 
183
  class AppState:
184
  def __init__(self):
185
  self.retriever = None
186
  self.doc_name = None
187
- self.llm_history = []
 
 
188
 
189
- def reset(self):
190
  self.retriever = None
191
  self.doc_name = None
192
- self.llm_history = []
 
193
 
194
  def reset_chat(self):
 
195
  self.llm_history = []
196
 
197
 
198
- def _build_index(text, name, threshold):
199
- r = ConjunctionReservoir(conjunction_threshold=float(threshold), coverage_decay=0.04)
200
- r.build_index(text, verbose=False)
201
- s = r.summary()
202
- status = (
203
- f"βœ… **{name}** loaded \n"
204
- f"{s['n_chunks']} chunks β€’ {s['n_sentences']} sentences β€’ "
205
- f"vocab {s['vocab_size']} β€’ {s['index_time_ms']:.0f}ms"
206
- )
207
- return status, r
208
 
 
 
209
 
210
- state = AppState()
211
-
212
- # ── Gradio UI ──────────────────────────────────────────────────────────────────
213
-
214
- with gr.Blocks(title="ConjunctionReservoir Document Chat") as demo:
 
 
 
 
 
 
 
 
215
 
216
- gr.Markdown("""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  # 🧠 ConjunctionReservoir Document Chat
218
  **Sentence-level conjunction retrieval** β€” terms must co-appear *in the same sentence* to score.
219
  Grounded in auditory neuroscience (Norman-Haignere 2025, Vollan 2025). Zero embeddings. Millisecond retrieval.
220
- """)
221
-
222
- with gr.Row():
223
- # ── Left column ────────────────────────────────────────────────────────
224
- with gr.Column(scale=1, min_width=300):
225
- gr.Markdown("### πŸ“„ Document")
226
-
227
- with gr.Tab("Upload File"):
228
- file_input = gr.File(
229
- label="Upload .txt or .pdf",
230
- file_types=[".txt", ".pdf", ".md"],
231
- type="filepath",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  )
233
- upload_btn = gr.Button("πŸ“₯ Load File", variant="primary")
234
 
235
- with gr.Tab("Paste Text"):
236
- text_input = gr.Textbox(
237
- label="Paste your text here",
238
- lines=8,
239
- placeholder="Paste any text...",
 
 
 
 
 
 
240
  )
241
- paste_name = gr.Textbox(label="Document name", value="pasted_text", max_lines=1)
242
- paste_btn = gr.Button("πŸ“₯ Load Text", variant="primary")
243
 
244
- with gr.Tab("Demo"):
245
- gr.Markdown("Load the built-in demo text about ConjunctionReservoir.")
246
- demo_btn = gr.Button("πŸ§ͺ Load Demo", variant="secondary")
 
 
 
247
 
248
- doc_status = gr.Markdown("*Loading demo…*")
 
 
 
249
 
250
- gr.Markdown("### βš™οΈ Settings")
251
 
252
- threshold_slider = gr.Slider(
253
- minimum=0.0, maximum=1.0, value=0.4, step=0.05,
254
- label="Conjunction threshold",
255
- info="Fraction of query terms that must co-appear in a sentence (0=TF-IDF, 1=strict AND)"
256
- )
257
 
258
- model_dropdown = gr.Dropdown(
259
- choices=[
260
- "mistralai/Mistral-7B-Instruct-v0.3",
261
- "HuggingFaceH4/zephyr-7b-beta",
262
- "microsoft/Phi-3-mini-4k-instruct",
263
- "google/gemma-2-2b-it",
264
- "Qwen/Qwen2.5-7B-Instruct",
265
- ],
266
- value=DEFAULT_MODEL,
267
- label="LLM model",
268
- info="HuggingFace Inference API (free)"
269
- )
270
 
271
- hf_token_input = gr.Textbox(
272
- label="HuggingFace token (optional)",
273
- placeholder="hf_...",
274
- type="password",
275
- info="Add for higher rate limits. Free at huggingface.co/settings/tokens"
276
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
- show_retrieval_chk = gr.Checkbox(label="Show retrieved passages", value=True)
279
- clear_btn = gr.Button("πŸ—‘οΈ Clear conversation", variant="stop", size="sm")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
- # ── Right column: chat ─────────────────────────────────────────────────
282
- with gr.Column(scale=2):
283
- gr.Markdown("### πŸ’¬ Chat")
284
 
285
- # Gradio 6: use type="messages" instead of tuple pairs
286
- chatbot = gr.Chatbot(
287
- label="",
288
- height=480,
289
- show_label=False,
290
- type="messages",
291
- )
292
 
293
- retrieval_info = gr.Markdown("")
 
 
 
294
 
295
- with gr.Row():
296
- msg_input = gr.Textbox(
297
- placeholder="Ask anything about your document…",
298
- show_label=False,
299
- scale=5,
300
- container=False,
301
- )
302
- send_btn = gr.Button("Send β–Ά", variant="primary", scale=1)
303
 
304
- gr.Markdown(
305
- "<small>**Tip:** Try queries requiring two concepts, e.g. *\"NMDA coincidence detection\"*. "
306
- "Commands: `:coverage` β€’ `:summary` β€’ `:threshold 0.7` β€’ `:help`</small>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  )
308
 
309
- # ── Callbacks ──────────────────────────────────────────────────────────────
310
-
311
- def load_file(filepath, threshold):
312
- if not filepath:
313
- return "*No file selected*", []
314
- text = extract_text_from_file(filepath)
315
- if text.startswith("ERROR"):
316
- return f"❌ {text}", []
317
- try:
318
- status, r = _build_index(text, Path(filepath).name, threshold)
319
- state.reset()
320
- state.retriever = r
321
- state.doc_name = Path(filepath).name
322
- return status, []
323
- except Exception as e:
324
- return f"❌ Error indexing: {e}", []
325
-
326
- def load_paste(text, name, threshold):
327
- if not text or not text.strip():
328
- return "*No text provided*", []
329
- try:
330
- doc_name = name or "pasted_text"
331
- status, r = _build_index(text.strip(), doc_name, threshold)
332
- state.reset()
333
- state.retriever = r
334
- state.doc_name = doc_name
335
- return status, []
336
- except Exception as e:
337
- return f"❌ Error indexing: {e}", []
338
 
339
- def load_demo(threshold):
340
- try:
341
- status, r = _build_index(DEMO_TEXT, "ConjunctionReservoir Demo", threshold)
342
- state.reset()
343
- state.retriever = r
344
- state.doc_name = "ConjunctionReservoir Demo"
345
- return status, []
346
- except Exception as e:
347
- return f"❌ {e}", []
348
 
349
- def startup_load():
350
- try:
351
- status, r = _build_index(DEMO_TEXT, "ConjunctionReservoir Demo", 0.4)
352
- state.retriever = r
353
- state.doc_name = "ConjunctionReservoir Demo"
354
- return status
355
- except Exception as e:
356
- return f"❌ Startup error: {e}"
357
 
358
- def clear_chat():
359
- state.reset_chat()
360
- return [], ""
361
 
362
- def handle_command(msg: str):
363
- cmd = msg.strip().lower()
364
- if cmd == ":coverage":
365
- if state.retriever is None:
366
- return "No document loaded.", True
367
- p = state.retriever.coverage_profile()
368
- lines = [f"**Vollan sweep coverage** (after {p['n_queries']} queries)\n",
369
- f"Mean coverage: {p['mean_coverage']:.5f}\n"]
370
- if p["most_covered"]:
371
- lines.append("**Most visited sentences:**")
372
- for sent, cov in p["most_covered"][:5]:
373
- lines.append(f"- [{cov:.3f}] {sent[:80]}…")
374
- return "\n".join(lines), True
375
- if cmd == ":summary":
376
- if state.retriever is None:
377
- return "No document loaded.", True
378
- s = state.retriever.summary()
379
- return "**Index summary**\n" + "\n".join(f"- **{k}**: {v}" for k, v in s.items()), True
380
- if cmd.startswith(":threshold "):
381
- try:
382
- val = max(0.0, min(1.0, float(cmd.split()[1])))
383
- if state.retriever:
384
- state.retriever.conjunction_threshold = val
385
- return f"βœ… Threshold set to **{val:.2f}**", True
386
- except Exception:
387
- return "Usage: `:threshold 0.5`", True
388
- if cmd == ":help":
389
- return ("**Commands:**\n"
390
- "- `:coverage` β€” Vollan sweep focus\n"
391
- "- `:summary` β€” index statistics\n"
392
- "- `:threshold N` β€” set gate (0.0–1.0)\n"
393
- "- `:help` β€” this message"), True
394
- return "", False
395
-
396
- def respond(msg, chat_history, threshold, model, hf_token, show_retrieval):
397
- if not msg or not msg.strip():
398
- yield chat_history, ""
399
- return
400
-
401
- if state.retriever is None:
402
- chat_history = chat_history + [
403
- {"role": "user", "content": msg},
404
- {"role": "assistant", "content": "⚠️ Please load a document first."}
405
- ]
406
- yield chat_history, ""
407
- return
408
-
409
- cmd_response, is_cmd = handle_command(msg)
410
- if is_cmd:
411
- chat_history = chat_history + [
412
- {"role": "user", "content": msg},
413
- {"role": "assistant", "content": cmd_response}
414
- ]
415
- yield chat_history, ""
416
- return
417
-
418
- # Retrieve
419
- q_tokens = set(re.findall(r'\b[a-zA-Z]{3,}\b', msg.lower()))
420
- t0 = time.perf_counter()
421
- hits = do_retrieve(state.retriever, msg, float(threshold))
422
- elapsed = (time.perf_counter() - t0) * 1000
423
-
424
- retrieval_display = format_retrieval_display(hits, q_tokens, elapsed) if show_retrieval else ""
425
-
426
- context_str = format_context_for_llm(hits)
427
- system = (
428
- f'You are a document assistant helping the user understand "{state.doc_name}". '
429
- f'Answer based on the provided passages. Be specific and cite text when useful. '
430
- f'If the answer is not in the passages, say so. Keep answers concise.'
431
  )
432
- user_with_context = f"Question: {msg}\n\nRelevant passages:\n\n{context_str}"
433
- messages = format_messages(system, state.llm_history, user_with_context)
434
 
435
- client = get_client(hf_token)
436
- partial = ""
437
- chat_history = chat_history + [
438
- {"role": "user", "content": msg},
439
- {"role": "assistant", "content": ""}
440
- ]
441
-
442
- for token in stream_response(client, model, messages):
443
- partial += token
444
- chat_history[-1] = {"role": "assistant", "content": partial}
445
- yield chat_history, retrieval_display
446
 
447
- state.llm_history.append((f"Question: {msg}", partial))
 
 
 
 
448
 
449
- # ── Wire events ────────────────────────────────────────────────────────────
450
 
451
- upload_btn.click(load_file, inputs=[file_input, threshold_slider], outputs=[doc_status, chatbot])
452
- paste_btn.click(load_paste, inputs=[text_input, paste_name, threshold_slider], outputs=[doc_status, chatbot])
453
- demo_btn.click(load_demo, inputs=[threshold_slider], outputs=[doc_status, chatbot])
454
- clear_btn.click(clear_chat, outputs=[chatbot, retrieval_info])
 
 
455
 
456
- send_btn.click(
457
- respond,
458
- inputs=[msg_input, chatbot, threshold_slider, model_dropdown, hf_token_input, show_retrieval_chk],
459
- outputs=[chatbot, retrieval_info],
460
- ).then(lambda: "", outputs=[msg_input])
 
461
 
462
- msg_input.submit(
463
- respond,
464
- inputs=[msg_input, chatbot, threshold_slider, model_dropdown, hf_token_input, show_retrieval_chk],
465
- outputs=[chatbot, retrieval_info],
466
- ).then(lambda: "", outputs=[msg_input])
467
 
468
- demo.load(startup_load, outputs=[doc_status])
469
 
470
 
471
  if __name__ == "__main__":
472
- demo.launch()
 
 
 
9
  import re
10
  import os
11
  import time
12
+ import json
13
  import gradio as gr
14
  from pathlib import Path
15
 
 
34
  DEFAULT_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
35
  FALLBACK_MODEL = "HuggingFaceH4/zephyr-7b-beta"
36
  MAX_TOKENS = 512
37
+ MAX_HISTORY = 6 # turns to keep in context
38
 
39
  DEMO_TEXT = """The ConjunctionReservoir is a document retrieval system that asks not
40
  "do these query terms appear somewhere in this chunk?" but rather
 
60
  # ── Text extraction ────────────────────────────────────────────────────────────
61
 
62
  def extract_text_from_file(filepath: str) -> str:
63
+ """Extract text from .txt or .pdf file."""
64
  path = Path(filepath)
65
  ext = path.suffix.lower()
66
+
67
  if ext == ".pdf":
68
  if not PDF_SUPPORT:
69
+ return "ERROR: PDF support not available. Please install PyMuPDF or pypdf."
70
  try:
71
  import fitz
72
  doc = fitz.open(filepath)
 
78
  return "\n\n".join(p.extract_text() or "" for p in reader.pages)
79
  except Exception as e:
80
  return f"ERROR reading PDF: {e}"
81
+
82
+ elif ext in (".txt", ".md", ".rst", ".text"):
83
  try:
84
  return path.read_text(encoding="utf-8", errors="replace")
85
  except Exception as e:
86
  return f"ERROR reading file: {e}"
87
 
88
+ else:
89
+ try:
90
+ return path.read_text(encoding="utf-8", errors="replace")
91
+ except Exception as e:
92
+ return f"ERROR: Unsupported file type {ext}. Try .txt or .pdf"
93
+
94
 
95
  # ── LLM generation ────────────────────────────────────────────────────────────
96
 
 
101
 
102
  def format_messages(system: str, history: list, user_msg: str) -> list:
103
  messages = [{"role": "system", "content": system}]
104
+ for user_h, asst_h in history[-MAX_HISTORY:]:
105
+ messages.append({"role": "user", "content": user_h})
106
+ messages.append({"role": "assistant", "content": asst_h})
107
  messages.append({"role": "user", "content": user_msg})
108
  return messages
109
 
110
 
111
  def stream_response(client, model, messages):
112
+ """Stream tokens from HF Inference API."""
113
  try:
114
  stream = client.chat.completions.create(
115
  model=model,
 
123
  if delta:
124
  yield delta
125
  except Exception as e:
126
+ # Try fallback model
127
  if model != FALLBACK_MODEL:
128
  try:
129
  stream = client.chat.completions.create(
 
140
  return
141
  except Exception:
142
  pass
143
+ yield f"\n\n⚠️ Generation error: {e}\n\nTip: Add a HuggingFace token in Settings for better rate limits."
144
 
145
 
146
  # ── Retrieval helpers ─────────────────────────────────────────────────────────
 
162
  hits = retriever.retrieve(query, top_k=n_chunks, update_coverage=True)
163
  hits = [(c, s) for c, s in hits if s > 0]
164
  if not hits:
165
+ # Loosen and retry
166
  old = retriever.conjunction_threshold
167
  retriever.conjunction_threshold = 0.0
168
  hits = retriever.retrieve(query, top_k=2, update_coverage=False)
 
191
  return "\n".join(lines)
192
 
193
 
194
+ # ── Main app state ─────────────────────────────────────────────────────────────
195
 
196
  class AppState:
197
  def __init__(self):
198
  self.retriever = None
199
  self.doc_name = None
200
+ self.doc_chars = 0
201
+ self.chat_history = [] # list of (user, assistant) for display
202
+ self.llm_history = [] # list of (user_with_context, assistant) for LLM
203
 
204
+ def reset_doc(self):
205
  self.retriever = None
206
  self.doc_name = None
207
+ self.doc_chars = 0
208
+ self.reset_chat()
209
 
210
  def reset_chat(self):
211
+ self.chat_history = []
212
  self.llm_history = []
213
 
214
 
215
+ # ── Build the Gradio UI ────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
216
 
217
+ def create_app():
218
+ state = AppState()
219
 
220
+ # Load demo immediately
221
+ def _load_demo():
222
+ state.reset_doc()
223
+ r = ConjunctionReservoir(conjunction_threshold=0.4, coverage_decay=0.04)
224
+ r.build_index(DEMO_TEXT, verbose=False)
225
+ state.retriever = r
226
+ state.doc_name = "ConjunctionReservoir Demo"
227
+ state.doc_chars = len(DEMO_TEXT)
228
+ s = r.summary()
229
+ return (
230
+ f"βœ… **{state.doc_name}** loaded \n"
231
+ f"{s['n_chunks']} chunks β€’ {s['n_sentences']} sentences β€’ vocab {s['vocab_size']}"
232
+ )
233
 
234
+ # ── Gradio layout ──────────────────────────────────────────────────────────
235
+ css = """
236
+ #doc-status { border-left: 4px solid #4CAF50; padding: 8px 12px; background: #f9f9f9; border-radius: 4px; }
237
+ #retrieval-info { font-size: 0.85em; color: #555; background: #f5f5f5; padding: 8px; border-radius: 4px; }
238
+ .setting-row { display: flex; gap: 12px; align-items: center; }
239
+ footer { display: none !important; }
240
+ """
241
+
242
+ theme = gr.themes.Soft(primary_hue="blue", neutral_hue="slate")
243
+
244
+ # Gradio 6.0 change: removed css and theme from Blocks init.
245
+ with gr.Blocks(
246
+ title="ConjunctionReservoir Document Chat",
247
+ ) as demo:
248
+
249
+ # ── Header ─────────────────────────────────────────────────────────────
250
+ gr.Markdown("""
251
  # 🧠 ConjunctionReservoir Document Chat
252
  **Sentence-level conjunction retrieval** β€” terms must co-appear *in the same sentence* to score.
253
  Grounded in auditory neuroscience (Norman-Haignere 2025, Vollan 2025). Zero embeddings. Millisecond retrieval.
254
+ """)
255
+
256
+ with gr.Row():
257
+ # ── Left column: document + settings ──────────────────────────────
258
+ with gr.Column(scale=1, min_width=300):
259
+ gr.Markdown("### πŸ“„ Document")
260
+
261
+ with gr.Tab("Upload File"):
262
+ file_input = gr.File(
263
+ label="Upload .txt or .pdf",
264
+ file_types=[".txt", ".pdf", ".md"],
265
+ type="filepath",
266
+ )
267
+ upload_btn = gr.Button("πŸ“₯ Load File", variant="primary")
268
+
269
+ with gr.Tab("Paste Text"):
270
+ text_input = gr.Textbox(
271
+ label="Paste your text here",
272
+ lines=8,
273
+ placeholder="Paste any text...",
274
+ )
275
+ paste_name = gr.Textbox(label="Document name", value="pasted_text", max_lines=1)
276
+ paste_btn = gr.Button("πŸ“₯ Load Text", variant="primary")
277
+
278
+ with gr.Tab("Demo"):
279
+ gr.Markdown("Load the built-in demo text about ConjunctionReservoir itself.")
280
+ demo_btn = gr.Button("πŸ§ͺ Load Demo", variant="secondary")
281
+
282
+ doc_status = gr.Markdown("*No document loaded*", elem_id="doc-status")
283
+
284
+ gr.Markdown("### βš™οΈ Settings")
285
+
286
+ threshold_slider = gr.Slider(
287
+ minimum=0.0, maximum=1.0, value=0.4, step=0.05,
288
+ label="Conjunction threshold",
289
+ info="Fraction of query terms that must co-appear in a sentence (0=TF-IDF, 1=strict AND)"
290
  )
 
291
 
292
+ model_dropdown = gr.Dropdown(
293
+ choices=[
294
+ "mistralai/Mistral-7B-Instruct-v0.3",
295
+ "HuggingFaceH4/zephyr-7b-beta",
296
+ "microsoft/Phi-3-mini-4k-instruct",
297
+ "google/gemma-2-2b-it",
298
+ "Qwen/Qwen2.5-7B-Instruct",
299
+ ],
300
+ value=DEFAULT_MODEL,
301
+ label="LLM model",
302
+ info="HuggingFace Inference API (free)"
303
  )
 
 
304
 
305
+ hf_token_input = gr.Textbox(
306
+ label="HuggingFace token (optional)",
307
+ placeholder="hf_...",
308
+ type="password",
309
+ info="Add for higher rate limits. Get one free at huggingface.co/settings/tokens"
310
+ )
311
 
312
+ show_retrieval_chk = gr.Checkbox(
313
+ label="Show retrieved passages",
314
+ value=True,
315
+ )
316
 
317
+ clear_btn = gr.Button("πŸ—‘οΈ Clear conversation", variant="stop", size="sm")
318
 
319
+ # ── Right column: chat ─────────────────────────────────────────────
320
+ with gr.Column(scale=2):
321
+ gr.Markdown("### πŸ’¬ Chat")
 
 
322
 
323
+ # Gradio 6.0 change: removed bubble_full_width and render_markdown
324
+ chatbot = gr.Chatbot(
325
+ label="",
326
+ height=480,
327
+ show_label=False,
328
+ )
 
 
 
 
 
 
329
 
330
+ retrieval_info = gr.Markdown("", elem_id="retrieval-info")
331
+
332
+ with gr.Row():
333
+ msg_input = gr.Textbox(
334
+ placeholder="Ask anything about your document…",
335
+ show_label=False,
336
+ scale=5,
337
+ container=False,
338
+ )
339
+ send_btn = gr.Button("Send β–Ά", variant="primary", scale=1)
340
+
341
+ gr.Markdown("""
342
+ <small>
343
+ **Tip:** Try queries that require two concepts together, e.g. *"NMDA coincidence detection"*.
344
+ Commands: type `:coverage` to see sweep focus β€’ `:summary` for index stats β€’ `:threshold 0.7` to change on-the-fly
345
+ </small>
346
+ """)
347
+
348
+ # ── Callbacks ──────────────────────────────────────────────────────────
349
+
350
+ def load_file(filepath, threshold):
351
+ if not filepath:
352
+ return "*No file selected*", state.chat_history
353
+ text = extract_text_from_file(filepath)
354
+ if text.startswith("ERROR"):
355
+ return f"❌ {text}", state.chat_history
356
+ return _index_text(text, Path(filepath).name, threshold)
357
+
358
+ def load_paste(text, name, threshold):
359
+ if not text or not text.strip():
360
+ return "*No text provided*", state.chat_history
361
+ return _index_text(text.strip(), name or "pasted_text", threshold)
362
+
363
+ def load_demo_cb(threshold):
364
+ status = _load_demo()
365
+ state.chat_history = []
366
+ state.llm_history = []
367
+ return status, []
368
 
369
+ def _index_text(text, name, threshold):
370
+ state.reset_doc()
371
+ try:
372
+ r = ConjunctionReservoir(
373
+ conjunction_threshold=float(threshold),
374
+ coverage_decay=0.04
375
+ )
376
+ r.build_index(text, verbose=False)
377
+ state.retriever = r
378
+ state.doc_name = name
379
+ state.doc_chars = len(text)
380
+ s = r.summary()
381
+ status = (
382
+ f"βœ… **{name}** loaded \n"
383
+ f"{s['n_chunks']} chunks β€’ {s['n_sentences']} sentences β€’ "
384
+ f"vocab {s['vocab_size']} β€’ {s['index_time_ms']:.0f}ms"
385
+ )
386
+ return status, []
387
+ except Exception as e:
388
+ return f"❌ Error indexing: {e}", state.chat_history
389
+
390
+ def clear_chat():
391
+ state.reset_chat()
392
+ return [], ""
393
+
394
+ def handle_command(msg: str):
395
+ """Handle special : commands. Returns (response_str, is_command)."""
396
+ cmd = msg.strip().lower()
397
+ if cmd == ":coverage":
398
+ if state.retriever is None:
399
+ return "No document loaded.", True
400
+ p = state.retriever.coverage_profile()
401
+ lines = [f"**Vollan sweep coverage** (after {p['n_queries']} queries) \n"]
402
+ lines.append(f"Mean coverage: {p['mean_coverage']:.5f} \n")
403
+ if p["most_covered"]:
404
+ lines.append("**Most visited sentences:**")
405
+ for sent, cov in p["most_covered"][:5]:
406
+ lines.append(f"- [{cov:.3f}] {sent[:80]}…")
407
+ return "\n".join(lines), True
408
+
409
+ if cmd == ":summary":
410
+ if state.retriever is None:
411
+ return "No document loaded.", True
412
+ s = state.retriever.summary()
413
+ return (
414
+ f"**Index summary** \n"
415
+ + "\n".join(f"- **{k}**: {v}" for k, v in s.items())
416
+ ), True
417
+
418
+ if cmd.startswith(":threshold "):
419
+ try:
420
+ val = float(cmd.split()[1])
421
+ val = max(0.0, min(1.0, val))
422
+ if state.retriever:
423
+ state.retriever.conjunction_threshold = val
424
+ return f"βœ… Threshold set to **{val:.2f}**", True
425
+ except Exception:
426
+ return "Usage: `:threshold 0.5`", True
427
+
428
+ if cmd == ":help":
429
+ return (
430
+ "**Commands:**\n"
431
+ "- `:coverage` β€” show Vollan sweep focus\n"
432
+ "- `:summary` β€” index statistics\n"
433
+ "- `:threshold N` β€” set conjunction gate (0.0–1.0)\n"
434
+ "- `:help` β€” this message"
435
+ ), True
436
 
437
+ return "", False
 
 
438
 
439
+ def respond(msg, chat_history, threshold, model, hf_token, show_retrieval):
440
+ if not msg or not msg.strip():
441
+ yield chat_history, ""
442
+ return
 
 
 
443
 
444
+ if state.retriever is None:
445
+ chat_history = chat_history + [(msg, "⚠️ Please load a document first.")]
446
+ yield chat_history, ""
447
+ return
448
 
449
+ # Handle commands
450
+ cmd_response, is_cmd = handle_command(msg)
451
+ if is_cmd:
452
+ chat_history = chat_history + [(msg, cmd_response)]
453
+ yield chat_history, ""
454
+ return
 
 
455
 
456
+ # Retrieve
457
+ q_tokens = set(re.findall(r'\b[a-zA-Z]{3,}\b', msg.lower()))
458
+ t0 = time.perf_counter()
459
+ hits = do_retrieve(state.retriever, msg, float(threshold))
460
+ elapsed = (time.perf_counter() - t0) * 1000
461
+
462
+ retrieval_display = ""
463
+ if show_retrieval:
464
+ retrieval_display = format_retrieval_display(hits, q_tokens, elapsed)
465
+
466
+ # Build LLM prompt
467
+ context_str = format_context_for_llm(hits)
468
+ system = (
469
+ f'You are a document assistant helping the user understand "{state.doc_name}". '
470
+ f'Answer based on the provided passages. Be specific and cite the text when useful. '
471
+ f'If the answer is not in the passages, say so clearly. Keep answers concise.'
472
+ )
473
+ user_with_context = (
474
+ f"Question: {msg}\n\n"
475
+ f"Relevant passages from the document:\n\n{context_str}"
476
  )
477
 
478
+ messages = format_messages(system, state.llm_history[-MAX_HISTORY:], user_with_context)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479
 
480
+ # Stream response
481
+ client = get_client(hf_token)
482
+ partial = ""
483
+ chat_history = chat_history + [(msg, "")]
484
+ for token in stream_response(client, model, messages):
485
+ partial += token
486
+ chat_history[-1] = (msg, partial)
487
+ yield chat_history, retrieval_display
 
488
 
489
+ # Save to history
490
+ state.llm_history.append((f"Question: {msg}", partial))
491
+ state.chat_history = chat_history
 
 
 
 
 
492
 
493
+ # ── Wire events ────────────────────────────────────────────────────────
 
 
494
 
495
+ upload_btn.click(
496
+ load_file,
497
+ inputs=[file_input, threshold_slider],
498
+ outputs=[doc_status, chatbot],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
499
  )
 
 
500
 
501
+ paste_btn.click(
502
+ load_paste,
503
+ inputs=[text_input, paste_name, threshold_slider],
504
+ outputs=[doc_status, chatbot],
505
+ )
 
 
 
 
 
 
506
 
507
+ demo_btn.click(
508
+ load_demo_cb,
509
+ inputs=[threshold_slider],
510
+ outputs=[doc_status, chatbot],
511
+ )
512
 
513
+ clear_btn.click(clear_chat, outputs=[chatbot, retrieval_info])
514
 
515
+ send_btn.click(
516
+ respond,
517
+ inputs=[msg_input, chatbot, threshold_slider, model_dropdown,
518
+ hf_token_input, show_retrieval_chk],
519
+ outputs=[chatbot, retrieval_info],
520
+ ).then(lambda: "", outputs=[msg_input])
521
 
522
+ msg_input.submit(
523
+ respond,
524
+ inputs=[msg_input, chatbot, threshold_slider, model_dropdown,
525
+ hf_token_input, show_retrieval_chk],
526
+ outputs=[chatbot, retrieval_info],
527
+ ).then(lambda: "", outputs=[msg_input])
528
 
529
+ # Load demo on startup
530
+ demo.load(_load_demo, outputs=[doc_status])
 
 
 
531
 
532
+ return demo, css, theme
533
 
534
 
535
  if __name__ == "__main__":
536
+ # Gradio 6.0 change: Pass css and theme into launch()
537
+ app, app_css, app_theme = create_app()
538
+ app.launch(share=False, css=app_css, theme=app_theme)