decodingdatascience commited on
Commit
98278f3
·
verified ·
1 Parent(s): 336d701

Create app2.py

Browse files
Files changed (1) hide show
  1. app2.py +257 -0
app2.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ import gradio as gr
4
+
5
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
6
+ from llama_index.llms.openai import OpenAI
7
+ from llama_index.embeddings.openai import OpenAIEmbedding
8
+
9
+
10
+ # ======================
11
+ # Config (safe defaults)
12
+ # ======================
13
+ MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
14
+ EMBED_MODEL = os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small")
15
+ TOP_K = int(os.getenv("TOP_K", "3"))
16
+
17
+ # Your knowledge base file in the Space repo
18
+ DOC_PATH = Path(os.getenv("DOC_PATH", "challenge_context.txt"))
19
+
20
+ # DDS logo (raw GitHub URL)
21
+ LOGO_URL = os.getenv(
22
+ "LOGO_URL",
23
+ "https://github.com/Decoding-Data-Science/airesidency/blob/main/dds_logo.jpg?raw=true",
24
+ )
25
+
26
+ SYSTEM_GUARDRAILS = (
27
+ "You are Challenge Copilot. Answer ONLY using the provided context. "
28
+ "If the answer is not in the context, say: 'I don’t know based on the current document.' "
29
+ "Then ask the user to add the missing official details to challenge_context.txt."
30
+ )
31
+
32
+ APP_TITLE = "Challenge Copilot — RAG Q&A Bot"
33
+ APP_SUBTITLE = (
34
+ "A simple Retrieval-Augmented Generation (RAG) chatbot that answers questions about the "
35
+ "Building AI Application Challenge using challenge_context.txt (LlamaIndex + OpenAI)."
36
+ )
37
+
38
+
39
+ # ======================
40
+ # Build index (cached)
41
+ # ======================
42
+ _INDEX = None
43
+ _QUERY_ENGINE = None
44
+
45
+ def build_index():
46
+ global _INDEX, _QUERY_ENGINE
47
+ if _QUERY_ENGINE is not None:
48
+ return _QUERY_ENGINE
49
+
50
+ if not os.getenv("OPENAI_API_KEY"):
51
+ raise RuntimeError(
52
+ "OPENAI_API_KEY is missing. Add it in the Space Settings → Variables and secrets."
53
+ )
54
+
55
+ if not DOC_PATH.exists():
56
+ DOC_PATH.write_text(
57
+ "Add the official Building AI Application Challenge content here.\n",
58
+ encoding="utf-8",
59
+ )
60
+
61
+ # LlamaIndex global settings
62
+ Settings.llm = OpenAI(model=MODEL, temperature=0.2)
63
+ Settings.embed_model = OpenAIEmbedding(model=EMBED_MODEL)
64
+ Settings.chunk_size = 800
65
+ Settings.chunk_overlap = 120
66
+
67
+ # Reader expects a directory
68
+ data_dir = str(DOC_PATH.parent)
69
+ docs = SimpleDirectoryReader(
70
+ input_dir=data_dir,
71
+ required_exts=[".txt"],
72
+ recursive=False
73
+ ).load_data()
74
+
75
+ # Only index the target file
76
+ docs = [d for d in docs if d.metadata.get("file_name") == DOC_PATH.name]
77
+ if not docs:
78
+ raise FileNotFoundError(f"Could not load {DOC_PATH.name}. Make sure it exists in the repo.")
79
+
80
+ _INDEX = VectorStoreIndex.from_documents(docs)
81
+ _QUERY_ENGINE = _INDEX.as_query_engine(similarity_top_k=TOP_K)
82
+ return _QUERY_ENGINE
83
+
84
+
85
+ def format_sources(resp, max_sources=3, max_chars=240):
86
+ lines = []
87
+ for i, sn in enumerate(getattr(resp, "source_nodes", [])[:max_sources], start=1):
88
+ fn = sn.node.metadata.get("file_name", "unknown")
89
+ snippet = sn.node.get_content().replace("\n", " ").strip()[:max_chars]
90
+ score = getattr(sn, "score", None)
91
+ score_txt = f" (score={score:.3f})" if isinstance(score, (float, int)) else ""
92
+ lines.append(f"{i}. {fn}{score_txt}: {snippet}...")
93
+ return "\n".join(lines) if lines else "No sources returned."
94
+
95
+
96
+ def chat(message, history):
97
+ qe = build_index()
98
+
99
+ prompt = (
100
+ f"{SYSTEM_GUARDRAILS}\n\n"
101
+ f"User question: {message}\n"
102
+ f"Answer using ONLY the context."
103
+ )
104
+
105
+ resp = qe.query(prompt)
106
+ answer = str(resp).strip()
107
+
108
+ show_sources = os.getenv("SHOW_SOURCES", "true").lower() == "true"
109
+ if show_sources:
110
+ answer += "\n\n---\n**Sources:**\n" + format_sources(resp, max_sources=TOP_K)
111
+
112
+ return answer
113
+
114
+
115
+ # ======================
116
+ # UI (professional layout)
117
+ # ======================
118
+ CSS = """
119
+ /* Layout polish */
120
+ .dds-header { display:flex; align-items:center; gap:16px; }
121
+ .dds-logo img { height:60px; width:auto; border-radius:10px; box-shadow: 0 2px 10px rgba(0,0,0,0.10); }
122
+ .dds-title { margin:0; line-height:1.1; }
123
+ .dds-subtitle { margin:6px 0 0 0; color: #555; }
124
+ .dds-card { border: 1px solid rgba(0,0,0,0.08); border-radius: 14px; padding: 14px; background: rgba(255,255,255,0.7); }
125
+ .dds-section-title { margin: 0 0 6px 0; }
126
+ .dds-muted { color: #666; font-size: 0.95rem; }
127
+ """
128
+
129
+ # Theme fallback (no theme passed to ChatInterface itself)
130
+ try:
131
+ theme_obj = gr.themes.Soft()
132
+ except Exception:
133
+ theme_obj = None
134
+
135
+ with gr.Blocks(theme=theme_obj, css=CSS, title=APP_TITLE) as demo:
136
+ # Header row (Logo left + Title right)
137
+ with gr.Row():
138
+ with gr.Column(scale=1, min_width=140):
139
+ # Use HTML for reliable remote image rendering
140
+ gr.HTML(
141
+ f"""
142
+ <div class="dds-logo">
143
+ <img src="{LOGO_URL}" alt="DDS Logo"/>
144
+ </div>
145
+ """
146
+ )
147
+ with gr.Column(scale=6):
148
+ gr.HTML(
149
+ f"""
150
+ <div class="dds-header">
151
+ <div>
152
+ <h2 class="dds-title">{APP_TITLE}</h2>
153
+ <p class="dds-subtitle">{APP_SUBTITLE}</p>
154
+ <p class="dds-muted">
155
+ Tip: If an answer is missing, add more official details to <b>challenge_context.txt</b> and restart the Space.
156
+ </p>
157
+ </div>
158
+ </div>
159
+ """
160
+ )
161
+
162
+ gr.Markdown("---")
163
+
164
+ # Two professional sections
165
+ with gr.Row():
166
+ # Section 1: Chat
167
+ with gr.Column(scale=6):
168
+ gr.HTML(
169
+ """
170
+ <div class="dds-card">
171
+ <h3 class="dds-section-title">Section 1 — Ask the Copilot</h3>
172
+ <p class="dds-muted">RAG flow: retrieve relevant chunks → generate a grounded answer using your LLM API.</p>
173
+ </div>
174
+ """
175
+ )
176
+
177
+ # ChatInterface (NO theme kwarg here)
178
+ gr.ChatInterface(
179
+ fn=chat,
180
+ examples=[
181
+ "What will I build in this live session?",
182
+ "Who is this best for?",
183
+ "What are the prerequisites?",
184
+ "What is the RAG flow in this project?"
185
+ ],
186
+ )
187
+
188
+ # Section 2: FAQ
189
+ with gr.Column(scale=4):
190
+ gr.HTML(
191
+ """
192
+ <div class="dds-card">
193
+ <h3 class="dds-section-title">Section 2 — FAQ</h3>
194
+ <p class="dds-muted">Common issues + quick fixes for deployment and content quality.</p>
195
+ </div>
196
+ """
197
+ )
198
+
199
+ with gr.Accordion("FAQ 1 — The bot says “I don’t know”", open=False):
200
+ gr.Markdown(
201
+ """
202
+ - This means the answer is **not present** in `challenge_context.txt`.
203
+ - Add the missing official content (rules, checkpoints, prizes, submission format, dates).
204
+ - Commit the updated TXT and **restart** the Space.
205
+ """.strip()
206
+ )
207
+
208
+ with gr.Accordion("FAQ 2 — OPENAI_API_KEY missing", open=False):
209
+ gr.Markdown(
210
+ """
211
+ - Go to your Space → **Settings → Variables and secrets**
212
+ - Add: `OPENAI_API_KEY`
213
+ - Save (Space restarts automatically).
214
+ """.strip()
215
+ )
216
+
217
+ with gr.Accordion("FAQ 3 — Sources are not showing", open=False):
218
+ gr.Markdown(
219
+ """
220
+ - Ensure `SHOW_SOURCES=true` in Space variables (or leave it unset; default is true).
221
+ - Increase `TOP_K` if you want more retrieved chunks.
222
+ """.strip()
223
+ )
224
+
225
+ with gr.Accordion("FAQ 4 — Improve answer quality", open=False):
226
+ gr.Markdown(
227
+ """
228
+ - Add more structured content into your TXT (headings + bullet points).
229
+ - Keep each checkpoint/rule as a clear section.
230
+ - Increase `TOP_K` slightly (e.g., 4–6) if context is larger.
231
+ """.strip()
232
+ )
233
+
234
+ with gr.Accordion("FAQ 5 — App fails on startup", open=False):
235
+ gr.Markdown(
236
+ """
237
+ - Check Space logs.
238
+ - Most common causes:
239
+ - Missing `challenge_context.txt` in repo
240
+ - Missing `OPENAI_API_KEY`
241
+ - Dependency mismatch (simplify `requirements.txt`)
242
+ """.strip()
243
+ )
244
+
245
+ gr.Markdown("---")
246
+ gr.Markdown(
247
+ """
248
+ **Admin notes**
249
+ - Context file: `challenge_context.txt`
250
+ - Model env vars: `OPENAI_MODEL`, `OPENAI_EMBED_MODEL`
251
+ - Retrieval env vars: `TOP_K`
252
+ - Sources toggle: `SHOW_SOURCES=true|false`
253
+ """.strip()
254
+ )
255
+
256
+ if __name__ == "__main__":
257
+ demo.launch()