| | import os |
| | from pathlib import Path |
| | import gradio as gr |
| |
|
| | from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings |
| | from llama_index.llms.openai import OpenAI |
| | from llama_index.embeddings.openai import OpenAIEmbedding |
| |
|
| |
|
| | |
| | |
| | |
| | MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") |
| | EMBED_MODEL = os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small") |
| | TOP_K = int(os.getenv("TOP_K", "3")) |
| |
|
| | |
| | DOC_PATH = Path(os.getenv("DOC_PATH", "challenge_context.txt")) |
| |
|
| | |
| | LOGO_URL = os.getenv( |
| | "LOGO_URL", |
| | "https://github.com/Decoding-Data-Science/airesidency/blob/main/dds_logo.jpg?raw=true", |
| | ) |
| |
|
| | SYSTEM_GUARDRAILS = ( |
| | "You are Challenge Copilot. Answer ONLY using the provided context. " |
| | "If the answer is not in the context, say: 'I don’t know based on the current document.' " |
| | "Then ask the user to add the missing official details to challenge_context.txt." |
| | ) |
| |
|
| | APP_TITLE = "Challenge Copilot — RAG Q&A Bot" |
| | APP_SUBTITLE = ( |
| | "A simple Retrieval-Augmented Generation (RAG) chatbot that answers questions about the " |
| | "Building AI Application Challenge using challenge_context.txt (LlamaIndex + OpenAI)." |
| | ) |
| |
|
| |
|
| | |
| | |
| | |
| | _INDEX = None |
| | _QUERY_ENGINE = None |
| |
|
| | def build_index(): |
| | global _INDEX, _QUERY_ENGINE |
| | if _QUERY_ENGINE is not None: |
| | return _QUERY_ENGINE |
| |
|
| | if not os.getenv("OPENAI_API_KEY"): |
| | raise RuntimeError( |
| | "OPENAI_API_KEY is missing. Add it in the Space Settings → Variables and secrets." |
| | ) |
| |
|
| | if not DOC_PATH.exists(): |
| | DOC_PATH.write_text( |
| | "Add the official Building AI Application Challenge content here.\n", |
| | encoding="utf-8", |
| | ) |
| |
|
| | |
| | Settings.llm = OpenAI(model=MODEL, temperature=0.2) |
| | Settings.embed_model = OpenAIEmbedding(model=EMBED_MODEL) |
| | Settings.chunk_size = 800 |
| | Settings.chunk_overlap = 120 |
| |
|
| | |
| | data_dir = str(DOC_PATH.parent) |
| | docs = SimpleDirectoryReader( |
| | input_dir=data_dir, |
| | required_exts=[".txt"], |
| | recursive=False |
| | ).load_data() |
| |
|
| | |
| | docs = [d for d in docs if d.metadata.get("file_name") == DOC_PATH.name] |
| | if not docs: |
| | raise FileNotFoundError(f"Could not load {DOC_PATH.name}. Make sure it exists in the repo.") |
| |
|
| | _INDEX = VectorStoreIndex.from_documents(docs) |
| | _QUERY_ENGINE = _INDEX.as_query_engine(similarity_top_k=TOP_K) |
| | return _QUERY_ENGINE |
| |
|
| |
|
| | def format_sources(resp, max_sources=3, max_chars=240): |
| | lines = [] |
| | for i, sn in enumerate(getattr(resp, "source_nodes", [])[:max_sources], start=1): |
| | fn = sn.node.metadata.get("file_name", "unknown") |
| | snippet = sn.node.get_content().replace("\n", " ").strip()[:max_chars] |
| | score = getattr(sn, "score", None) |
| | score_txt = f" (score={score:.3f})" if isinstance(score, (float, int)) else "" |
| | lines.append(f"{i}. {fn}{score_txt}: {snippet}...") |
| | return "\n".join(lines) if lines else "No sources returned." |
| |
|
| |
|
| | def chat(message, history): |
| | qe = build_index() |
| |
|
| | prompt = ( |
| | f"{SYSTEM_GUARDRAILS}\n\n" |
| | f"User question: {message}\n" |
| | f"Answer using ONLY the context." |
| | ) |
| |
|
| | resp = qe.query(prompt) |
| | answer = str(resp).strip() |
| |
|
| | show_sources = os.getenv("SHOW_SOURCES", "true").lower() == "true" |
| | if show_sources: |
| | answer += "\n\n---\n**Sources:**\n" + format_sources(resp, max_sources=TOP_K) |
| |
|
| | return answer |
| |
|
| |
|
| | |
| | |
| | |
| | CSS = """ |
| | /* Layout polish */ |
| | .dds-header { display:flex; align-items:center; gap:16px; } |
| | .dds-logo img { height:60px; width:auto; border-radius:10px; box-shadow: 0 2px 10px rgba(0,0,0,0.10); } |
| | .dds-title { margin:0; line-height:1.1; } |
| | .dds-subtitle { margin:6px 0 0 0; color: #555; } |
| | .dds-card { border: 1px solid rgba(0,0,0,0.08); border-radius: 14px; padding: 14px; background: rgba(255,255,255,0.7); } |
| | .dds-section-title { margin: 0 0 6px 0; } |
| | .dds-muted { color: #666; font-size: 0.95rem; } |
| | """ |
| |
|
| | |
| | try: |
| | theme_obj = gr.themes.Soft() |
| | except Exception: |
| | theme_obj = None |
| |
|
| | with gr.Blocks(theme=theme_obj, css=CSS, title=APP_TITLE) as demo: |
| | |
| | with gr.Row(): |
| | with gr.Column(scale=1, min_width=140): |
| | |
| | gr.HTML( |
| | f""" |
| | <div class="dds-logo"> |
| | <img src="{LOGO_URL}" alt="DDS Logo"/> |
| | </div> |
| | """ |
| | ) |
| | with gr.Column(scale=6): |
| | gr.HTML( |
| | f""" |
| | <div class="dds-header"> |
| | <div> |
| | <h2 class="dds-title">{APP_TITLE}</h2> |
| | <p class="dds-subtitle">{APP_SUBTITLE}</p> |
| | <p class="dds-muted"> |
| | Tip: If an answer is missing, add more official details to <b>challenge_context.txt</b> and restart the Space. |
| | </p> |
| | </div> |
| | </div> |
| | """ |
| | ) |
| |
|
| | gr.Markdown("---") |
| |
|
| | |
| | with gr.Row(): |
| | |
| | with gr.Column(scale=6): |
| | gr.HTML( |
| | """ |
| | <div class="dds-card"> |
| | <h3 class="dds-section-title">Section 1 — Ask the Copilot</h3> |
| | <p class="dds-muted">RAG flow: retrieve relevant chunks → generate a grounded answer using your LLM API.</p> |
| | </div> |
| | """ |
| | ) |
| |
|
| | |
| | gr.ChatInterface( |
| | fn=chat, |
| | examples=[ |
| | "What will I build in this live session?", |
| | "Who is this best for?", |
| | "What are the prerequisites?", |
| | "What is the RAG flow in this project?" |
| | ], |
| | ) |
| |
|
| | |
| | with gr.Column(scale=4): |
| | gr.HTML( |
| | """ |
| | <div class="dds-card"> |
| | <h3 class="dds-section-title">Section 2 — FAQ</h3> |
| | <p class="dds-muted">Common issues + quick fixes for deployment and content quality.</p> |
| | </div> |
| | """ |
| | ) |
| |
|
| | with gr.Accordion("FAQ 1 — The bot says “I don’t know”", open=False): |
| | gr.Markdown( |
| | """ |
| | - This means the answer is **not present** in `challenge_context.txt`. |
| | - Add the missing official content (rules, checkpoints, prizes, submission format, dates). |
| | - Commit the updated TXT and **restart** the Space. |
| | """.strip() |
| | ) |
| |
|
| | with gr.Accordion("FAQ 2 — OPENAI_API_KEY missing", open=False): |
| | gr.Markdown( |
| | """ |
| | - Go to your Space → **Settings → Variables and secrets** |
| | - Add: `OPENAI_API_KEY` |
| | - Save (Space restarts automatically). |
| | """.strip() |
| | ) |
| |
|
| | with gr.Accordion("FAQ 3 — Sources are not showing", open=False): |
| | gr.Markdown( |
| | """ |
| | - Ensure `SHOW_SOURCES=true` in Space variables (or leave it unset; default is true). |
| | - Increase `TOP_K` if you want more retrieved chunks. |
| | """.strip() |
| | ) |
| |
|
| | with gr.Accordion("FAQ 4 — Improve answer quality", open=False): |
| | gr.Markdown( |
| | """ |
| | - Add more structured content into your TXT (headings + bullet points). |
| | - Keep each checkpoint/rule as a clear section. |
| | - Increase `TOP_K` slightly (e.g., 4–6) if context is larger. |
| | """.strip() |
| | ) |
| |
|
| | with gr.Accordion("FAQ 5 — App fails on startup", open=False): |
| | gr.Markdown( |
| | """ |
| | - Check Space logs. |
| | - Most common causes: |
| | - Missing `challenge_context.txt` in repo |
| | - Missing `OPENAI_API_KEY` |
| | - Dependency mismatch (simplify `requirements.txt`) |
| | """.strip() |
| | ) |
| |
|
| | gr.Markdown("---") |
| | gr.Markdown( |
| | """ |
| | **Admin notes** |
| | - Context file: `challenge_context.txt` |
| | - Model env vars: `OPENAI_MODEL`, `OPENAI_EMBED_MODEL` |
| | - Retrieval env vars: `TOP_K` |
| | - Sources toggle: `SHOW_SOURCES=true|false` |
| | """.strip() |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|