File size: 11,544 Bytes
77c2f68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00bebe2
77c2f68
 
 
 
 
 
 
 
 
 
 
b03b85b
 
77c2f68
 
 
 
 
 
 
 
 
 
b03b85b
 
77c2f68
 
 
 
 
b03b85b
 
77c2f68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b03b85b
 
77c2f68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b03b85b
77c2f68
 
 
 
b03b85b
77c2f68
 
 
 
 
 
 
b03b85b
77c2f68
 
 
b03b85b
77c2f68
 
 
 
b03b85b
 
 
77c2f68
 
 
b03b85b
77c2f68
 
 
b03b85b
77c2f68
b03b85b
77c2f68
 
 
 
b03b85b
77c2f68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
import os
from pathlib import Path
import gradio as gr

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding


# ======================
# Config (safe defaults)
# ======================
MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
EMBED_MODEL = os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small")
TOP_K = int(os.getenv("TOP_K", "3"))

# Knowledge base file in the Space repo root (recommended)
DOC_PATH = Path(os.getenv("DOC_PATH", "challenge_context.txt"))

# DDS logo (raw GitHub URL)
LOGO_URL = os.getenv(
    "LOGO_URL",
    "https://github.com/Decoding-Data-Science/airesidency/blob/main/dds_logo.jpg?raw=true",
)

# Behavior / guardrails
SYSTEM_GUARDRAILS = (
    "You are Challenge Copilot. Answer ONLY using the provided context from challenge_context.txt. "
    "If the answer is not in the context, say: 'I don’t know based on the current document.' "
    "Then ask the user to add the missing official details to challenge_context.txt."
)

APP_TITLE = "Building AI Application Challenge Copilot"
APP_SUBTITLE = (
    "Ask questions about the Building AI Application Challenge using the official content you put into "
    "challenge_context.txt (LlamaIndex + OpenAI)."
)


# ======================
# Index build (cached)
# ======================
_INDEX = None
_QUERY_ENGINE = None


def build_index():
    """Build and cache the LlamaIndex query engine."""
    global _INDEX, _QUERY_ENGINE
    if _QUERY_ENGINE is not None:
        return _QUERY_ENGINE

    # OpenAI key must exist in Space secrets
    if not os.getenv("OPENAI_API_KEY"):
        raise RuntimeError(
            "OPENAI_API_KEY is missing. Add it in the Space → Settings → Variables and secrets."
        )

    # Create placeholder TXT if missing so Space can boot
    if not DOC_PATH.exists():
        DOC_PATH.write_text(
            "Add the official Building AI Application Challenge content here.\n",
            encoding="utf-8",
        )

    # LlamaIndex global settings
    Settings.llm = OpenAI(model=MODEL, temperature=0.2)
    Settings.embed_model = OpenAIEmbedding(model=EMBED_MODEL)
    Settings.chunk_size = 800
    Settings.chunk_overlap = 120

    # Reader expects a directory
    data_dir = str(DOC_PATH.parent)
    docs = SimpleDirectoryReader(
        input_dir=data_dir,
        required_exts=[".txt"],
        recursive=False,
    ).load_data()

    # Only index the target file
    docs = [d for d in docs if d.metadata.get("file_name") == DOC_PATH.name]
    if not docs:
        raise FileNotFoundError(
            f"Could not load {DOC_PATH.name}. Make sure it exists in the repo root (or set DOC_PATH env var)."
        )

    _INDEX = VectorStoreIndex.from_documents(docs)
    _QUERY_ENGINE = _INDEX.as_query_engine(similarity_top_k=TOP_K)
    return _QUERY_ENGINE


def format_sources(resp, max_sources=3, max_chars=240):
    """Format top retrieved chunks for transparency."""
    lines = []
    for i, sn in enumerate(getattr(resp, "source_nodes", [])[:max_sources], start=1):
        fn = sn.node.metadata.get("file_name", "unknown")
        snippet = sn.node.get_content().replace("\n", " ").strip()[:max_chars]
        score = getattr(sn, "score", None)
        score_txt = f" (score={score:.3f})" if isinstance(score, (float, int)) else ""
        lines.append(f"{i}. {fn}{score_txt}: {snippet}...")
    return "\n".join(lines) if lines else "No sources returned."


def chat(message, history):
    """Chat handler used by Gradio ChatInterface."""
    qe = build_index()

    prompt = (
        f"{SYSTEM_GUARDRAILS}\n\n"
        f"User question: {message}\n"
        f"Answer using ONLY the context."
    )

    resp = qe.query(prompt)
    answer = str(resp).strip()

    show_sources = os.getenv("SHOW_SOURCES", "true").lower() == "true"
    if show_sources:
        answer += "\n\n---\n**Sources:**\n" + format_sources(resp, max_sources=TOP_K)

    return answer


# ======================
# UI (professional layout)
# ======================
CSS = """
/* Global polish */
.dds-header { display:flex; align-items:center; gap:16px; }
.dds-logo img { height:60px; width:auto; border-radius:10px; box-shadow: 0 2px 10px rgba(0,0,0,0.10); }
.dds-title { margin:0; line-height:1.1; }
.dds-subtitle { margin:6px 0 0 0; color: #555; }
.dds-muted { color: #666; font-size: 0.95rem; }
.dds-card { border: 1px solid rgba(0,0,0,0.08); border-radius: 14px; padding: 14px; background: rgba(255,255,255,0.7); }
.dds-section-title { margin: 0 0 6px 0; }
"""

# Theme fallback (don’t pass theme to ChatInterface to avoid older-gradio errors)
try:
    theme_obj = gr.themes.Soft()
except Exception:
    theme_obj = None

with gr.Blocks(theme=theme_obj, css=CSS, title=APP_TITLE) as demo:
    # Header row (Logo left + Title right)
    with gr.Row():
        with gr.Column(scale=1, min_width=140):
            gr.HTML(
                f"""
                <div class="dds-logo">
                    <img src="{LOGO_URL}" alt="DDS Logo"/>
                </div>
                """
            )
        with gr.Column(scale=6):
            gr.HTML(
                f"""
                <div class="dds-header">
                    <div>
                        <h2 class="dds-title">{APP_TITLE}</h2>
                        <p class="dds-subtitle">{APP_SUBTITLE}</p>
                        <p class="dds-muted">
                            If something is missing, add official details to <b>{DOC_PATH.name}</b> and restart the Space.
                        </p>
                    </div>
                </div>
                """
            )

    gr.Markdown("---")

    # Two sections: Chat + Challenge FAQ
    with gr.Row():
        # Section 1: Chat
        with gr.Column(scale=6):
            gr.HTML(
                """
                <div class="dds-card">
                    <h3 class="dds-section-title">Section 1 — Ask the Copilot</h3>
                    <p class="dds-muted">RAG flow: retrieve relevant chunks → generate a grounded answer using your LLM API.</p>
                </div>
                """
            )

            # IMPORTANT: No theme= here (avoids your earlier error)
            gr.ChatInterface(
                fn=chat,
                examples=[
                    "What will I build in this live session?",
                    "Who is this best for?",
                    "What are the prerequisites?",
                    "What is the RAG flow in this project?",
                    "What should I submit (link + repo + write-up)?",
                ],
            )

        # Section 2: Challenge FAQ (participant-focused)
        with gr.Column(scale=4):
            gr.HTML(
                """
                <div class="dds-card">
                    <h3 class="dds-section-title">Section 2 — Challenge FAQ</h3>
                    <p class="dds-muted">
                        Quick guidance for participants. If something is not answered here, ask in the Copilot chat.
                    </p>
                </div>
                """
            )

            with gr.Accordion("FAQ 1 — What should I build for this challenge?", open=False):
                gr.Markdown(
                    """
- Build a simple AI application aligned to the challenge tracks (LLM/API, no-code/low-code, sponsor tool track, etc.).
- Aim for a **working demo** + **proof-of-work** you can share.
- Ask in chat: *“Suggest 5 project ideas that fit the official rules in the document.”*
                    """.strip()
                )

            with gr.Accordion("FAQ 2 — Which track/path should I choose?", open=False):
                gr.Markdown(
                    """
- Pick based on your level:
  - **LLM/API Integration:** Python + API + simple RAG patterns
  - **No-code/Low-code:** fastest to ship, less code
  - **Sponsor/tool track:** follow the workshop tool (if applicable)
- Ask in chat: *“Given my background (X), which track is best and why?”*
                    """.strip()
                )

            with gr.Accordion("FAQ 3 — What is the minimum deliverable to be eligible?", open=False):
                gr.Markdown(
                    """
Typical minimum:
- A working **app link** that judges can open
- A short description (problem + user + how to use)
- Repo is optional but strongly recommended
Ask in chat: *“What does the official document say about minimum submission requirements?”*
                    """.strip()
                )

            with gr.Accordion("FAQ 4 — How do I submit my project?", open=False):
                gr.Markdown(
                    """
Common submission package:
- App URL (Hugging Face Spaces / Streamlit / etc.)
- Repo URL (optional but strong)
- Short write-up + screenshots/video (if required)
Ask in chat: *“What is the official submission format and where is the submission link?”*
                    """.strip()
                )

            with gr.Accordion("FAQ 5 — Where should I deploy so judges can access easily?", open=False):
                gr.Markdown(
                    """
Low-friction options:
- **Hugging Face Spaces (Gradio)** — easiest for demos
- **Streamlit Community Cloud**
- **Vercel** (for web apps)
Ask in chat: *“What deployment options are recommended in the official challenge doc?”*
                    """.strip()
                )

            with gr.Accordion("FAQ 6 — What do judges usually look for?", open=False):
                gr.Markdown(
                    """
Strong signals:
- Working demo (no errors, easy to use)
- Clear problem + target audience
- Good AI behavior (grounded, safe, consistent)
- Product thinking (UX, clarity, flow)
Ask in chat: *“What are the judging criteria in the official document?”*
                    """.strip()
                )

            with gr.Accordion("FAQ 7 — What should I post as proof-of-work?", open=False):
                gr.Markdown(
                    """
Suggested proof post structure:
- 1-line problem + who it helps
- Demo link + screenshot/GIF
- What you learned + next improvement
Ask in chat: *“Draft a proof-of-work post based on my project idea.”*
                    """.strip()
                )

            with gr.Accordion("FAQ 8 — How do I make my app ‘RAG grounded’ (not hallucinating)?", open=False):
                gr.Markdown(
                    """
Best practices:
- Restrict answers to retrieved context
- Show sources/snippets (optional but strong)
- If missing info → say “Not in document” and request adding content
Ask in chat: *“Answer using only the document; if missing, tell me what section to add.”*
                    """.strip()
                )

            with gr.Accordion("FAQ 9 — I can’t find a detail (dates/rules/prizes). What now?", open=False):
                gr.Markdown(
                    f"""
- The Copilot can only answer what exists inside **{DOC_PATH.name}**.
- If the official detail isn’t in the TXT, add it, commit, and restart the Space.
Ask in chat: *“What exact section should I add to cover [missing detail]?”*
                    """.strip()
                )

    gr.Markdown("---")
    gr.Markdown(
        f"""
**Admin notes**
- Context file: `{DOC_PATH.name}`
- Optional env vars: `OPENAI_MODEL`, `OPENAI_EMBED_MODEL`, `TOP_K`, `SHOW_SOURCES`, `DOC_PATH`, `LOGO_URL`
        """.strip()
    )

if __name__ == "__main__":
    demo.launch()