File size: 8,692 Bytes
98278f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
import os
from pathlib import Path
import gradio as gr

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding


# ======================
# Config (safe defaults)
# ======================
MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
EMBED_MODEL = os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small")
TOP_K = int(os.getenv("TOP_K", "3"))

# Your knowledge base file in the Space repo
DOC_PATH = Path(os.getenv("DOC_PATH", "challenge_context.txt"))

# DDS logo (raw GitHub URL)
LOGO_URL = os.getenv(
    "LOGO_URL",
    "https://github.com/Decoding-Data-Science/airesidency/blob/main/dds_logo.jpg?raw=true",
)

SYSTEM_GUARDRAILS = (
    "You are Challenge Copilot. Answer ONLY using the provided context. "
    "If the answer is not in the context, say: 'I don’t know based on the current document.' "
    "Then ask the user to add the missing official details to challenge_context.txt."
)

APP_TITLE = "Challenge Copilot — RAG Q&A Bot"
APP_SUBTITLE = (
    "A simple Retrieval-Augmented Generation (RAG) chatbot that answers questions about the "
    "Building AI Application Challenge using challenge_context.txt (LlamaIndex + OpenAI)."
)


# ======================
# Build index (cached)
# ======================
_INDEX = None
_QUERY_ENGINE = None

def build_index():
    global _INDEX, _QUERY_ENGINE
    if _QUERY_ENGINE is not None:
        return _QUERY_ENGINE

    if not os.getenv("OPENAI_API_KEY"):
        raise RuntimeError(
            "OPENAI_API_KEY is missing. Add it in the Space Settings → Variables and secrets."
        )

    if not DOC_PATH.exists():
        DOC_PATH.write_text(
            "Add the official Building AI Application Challenge content here.\n",
            encoding="utf-8",
        )

    # LlamaIndex global settings
    Settings.llm = OpenAI(model=MODEL, temperature=0.2)
    Settings.embed_model = OpenAIEmbedding(model=EMBED_MODEL)
    Settings.chunk_size = 800
    Settings.chunk_overlap = 120

    # Reader expects a directory
    data_dir = str(DOC_PATH.parent)
    docs = SimpleDirectoryReader(
        input_dir=data_dir,
        required_exts=[".txt"],
        recursive=False
    ).load_data()

    # Only index the target file
    docs = [d for d in docs if d.metadata.get("file_name") == DOC_PATH.name]
    if not docs:
        raise FileNotFoundError(f"Could not load {DOC_PATH.name}. Make sure it exists in the repo.")

    _INDEX = VectorStoreIndex.from_documents(docs)
    _QUERY_ENGINE = _INDEX.as_query_engine(similarity_top_k=TOP_K)
    return _QUERY_ENGINE


def format_sources(resp, max_sources=3, max_chars=240):
    lines = []
    for i, sn in enumerate(getattr(resp, "source_nodes", [])[:max_sources], start=1):
        fn = sn.node.metadata.get("file_name", "unknown")
        snippet = sn.node.get_content().replace("\n", " ").strip()[:max_chars]
        score = getattr(sn, "score", None)
        score_txt = f" (score={score:.3f})" if isinstance(score, (float, int)) else ""
        lines.append(f"{i}. {fn}{score_txt}: {snippet}...")
    return "\n".join(lines) if lines else "No sources returned."


def chat(message, history):
    qe = build_index()

    prompt = (
        f"{SYSTEM_GUARDRAILS}\n\n"
        f"User question: {message}\n"
        f"Answer using ONLY the context."
    )

    resp = qe.query(prompt)
    answer = str(resp).strip()

    show_sources = os.getenv("SHOW_SOURCES", "true").lower() == "true"
    if show_sources:
        answer += "\n\n---\n**Sources:**\n" + format_sources(resp, max_sources=TOP_K)

    return answer


# ======================
# UI (professional layout)
# ======================
CSS = """
/* Layout polish */
.dds-header { display:flex; align-items:center; gap:16px; }
.dds-logo img { height:60px; width:auto; border-radius:10px; box-shadow: 0 2px 10px rgba(0,0,0,0.10); }
.dds-title { margin:0; line-height:1.1; }
.dds-subtitle { margin:6px 0 0 0; color: #555; }
.dds-card { border: 1px solid rgba(0,0,0,0.08); border-radius: 14px; padding: 14px; background: rgba(255,255,255,0.7); }
.dds-section-title { margin: 0 0 6px 0; }
.dds-muted { color: #666; font-size: 0.95rem; }
"""

# Theme fallback (no theme passed to ChatInterface itself)
try:
    theme_obj = gr.themes.Soft()
except Exception:
    theme_obj = None

with gr.Blocks(theme=theme_obj, css=CSS, title=APP_TITLE) as demo:
    # Header row (Logo left + Title right)
    with gr.Row():
        with gr.Column(scale=1, min_width=140):
            # Use HTML for reliable remote image rendering
            gr.HTML(
                f"""
                <div class="dds-logo">
                    <img src="{LOGO_URL}" alt="DDS Logo"/>
                </div>
                """
            )
        with gr.Column(scale=6):
            gr.HTML(
                f"""
                <div class="dds-header">
                    <div>
                        <h2 class="dds-title">{APP_TITLE}</h2>
                        <p class="dds-subtitle">{APP_SUBTITLE}</p>
                        <p class="dds-muted">
                            Tip: If an answer is missing, add more official details to <b>challenge_context.txt</b> and restart the Space.
                        </p>
                    </div>
                </div>
                """
            )

    gr.Markdown("---")

    # Two professional sections
    with gr.Row():
        # Section 1: Chat
        with gr.Column(scale=6):
            gr.HTML(
                """
                <div class="dds-card">
                    <h3 class="dds-section-title">Section 1 — Ask the Copilot</h3>
                    <p class="dds-muted">RAG flow: retrieve relevant chunks → generate a grounded answer using your LLM API.</p>
                </div>
                """
            )

            # ChatInterface (NO theme kwarg here)
            gr.ChatInterface(
                fn=chat,
                examples=[
                    "What will I build in this live session?",
                    "Who is this best for?",
                    "What are the prerequisites?",
                    "What is the RAG flow in this project?"
                ],
            )

        # Section 2: FAQ
        with gr.Column(scale=4):
            gr.HTML(
                """
                <div class="dds-card">
                    <h3 class="dds-section-title">Section 2 — FAQ</h3>
                    <p class="dds-muted">Common issues + quick fixes for deployment and content quality.</p>
                </div>
                """
            )

            with gr.Accordion("FAQ 1 — The bot says “I don’t know”", open=False):
                gr.Markdown(
                    """
- This means the answer is **not present** in `challenge_context.txt`.
- Add the missing official content (rules, checkpoints, prizes, submission format, dates).
- Commit the updated TXT and **restart** the Space.
                    """.strip()
                )

            with gr.Accordion("FAQ 2 — OPENAI_API_KEY missing", open=False):
                gr.Markdown(
                    """
- Go to your Space → **Settings → Variables and secrets**
- Add: `OPENAI_API_KEY`
- Save (Space restarts automatically).
                    """.strip()
                )

            with gr.Accordion("FAQ 3 — Sources are not showing", open=False):
                gr.Markdown(
                    """
- Ensure `SHOW_SOURCES=true` in Space variables (or leave it unset; default is true).
- Increase `TOP_K` if you want more retrieved chunks.
                    """.strip()
                )

            with gr.Accordion("FAQ 4 — Improve answer quality", open=False):
                gr.Markdown(
                    """
- Add more structured content into your TXT (headings + bullet points).
- Keep each checkpoint/rule as a clear section.
- Increase `TOP_K` slightly (e.g., 4–6) if context is larger.
                    """.strip()
                )

            with gr.Accordion("FAQ 5 — App fails on startup", open=False):
                gr.Markdown(
                    """
- Check Space logs.
- Most common causes:
  - Missing `challenge_context.txt` in repo
  - Missing `OPENAI_API_KEY`
  - Dependency mismatch (simplify `requirements.txt`)
                    """.strip()
                )

    gr.Markdown("---")
    gr.Markdown(
        """
**Admin notes**
- Context file: `challenge_context.txt`
- Model env vars: `OPENAI_MODEL`, `OPENAI_EMBED_MODEL`
- Retrieval env vars: `TOP_K`
- Sources toggle: `SHOW_SOURCES=true|false`
        """.strip()
    )

if __name__ == "__main__":
    demo.launch()