Spaces:

decodingdatascience
/

Challengebot

Running

File size: 8,692 Bytes

98278f3

import os
from pathlib import Path
import gradio as gr

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding


# ======================
# Config (safe defaults)
# ======================
MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
EMBED_MODEL = os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small")
TOP_K = int(os.getenv("TOP_K", "3"))

# Your knowledge base file in the Space repo
DOC_PATH = Path(os.getenv("DOC_PATH", "challenge_context.txt"))

# DDS logo (raw GitHub URL)
LOGO_URL = os.getenv(
    "LOGO_URL",
    "https://github.com/Decoding-Data-Science/airesidency/blob/main/dds_logo.jpg?raw=true",
)

SYSTEM_GUARDRAILS = (
    "You are Challenge Copilot. Answer ONLY using the provided context. "
    "If the answer is not in the context, say: 'I don’t know based on the current document.' "
    "Then ask the user to add the missing official details to challenge_context.txt."
)

APP_TITLE = "Challenge Copilot — RAG Q&A Bot"
APP_SUBTITLE = (
    "A simple Retrieval-Augmented Generation (RAG) chatbot that answers questions about the "
    "Building AI Application Challenge using challenge_context.txt (LlamaIndex + OpenAI)."
)


# ======================
# Build index (cached)
# ======================
_INDEX = None
_QUERY_ENGINE = None

def build_index():
    global _INDEX, _QUERY_ENGINE
    if _QUERY_ENGINE is not None:
        return _QUERY_ENGINE

    if not os.getenv("OPENAI_API_KEY"):
        raise RuntimeError(
            "OPENAI_API_KEY is missing. Add it in the Space Settings → Variables and secrets."
        )

    if not DOC_PATH.exists():
        DOC_PATH.write_text(
            "Add the official Building AI Application Challenge content here.\n",
            encoding="utf-8",
        )

    # LlamaIndex global settings
    Settings.llm = OpenAI(model=MODEL, temperature=0.2)
    Settings.embed_model = OpenAIEmbedding(model=EMBED_MODEL)
    Settings.chunk_size = 800
    Settings.chunk_overlap = 120

    # Reader expects a directory
    data_dir = str(DOC_PATH.parent)
    docs = SimpleDirectoryReader(
        input_dir=data_dir,
        required_exts=[".txt"],
        recursive=False
    ).load_data()

    # Only index the target file
    docs = [d for d in docs if d.metadata.get("file_name") == DOC_PATH.name]
    if not docs:
        raise FileNotFoundError(f"Could not load {DOC_PATH.name}. Make sure it exists in the repo.")

    _INDEX = VectorStoreIndex.from_documents(docs)
    _QUERY_ENGINE = _INDEX.as_query_engine(similarity_top_k=TOP_K)
    return _QUERY_ENGINE


def format_sources(resp, max_sources=3, max_chars=240):
    lines = []
    for i, sn in enumerate(getattr(resp, "source_nodes", [])[:max_sources], start=1):
        fn = sn.node.metadata.get("file_name", "unknown")
        snippet = sn.node.get_content().replace("\n", " ").strip()[:max_chars]
        score = getattr(sn, "score", None)
        score_txt = f" (score={score:.3f})" if isinstance(score, (float, int)) else ""
        lines.append(f"{i}. {fn}{score_txt}: {snippet}...")
    return "\n".join(lines) if lines else "No sources returned."


def chat(message, history):
    qe = build_index()

    prompt = (
        f"{SYSTEM_GUARDRAILS}\n\n"
        f"User question: {message}\n"
        f"Answer using ONLY the context."
    )

    resp = qe.query(prompt)
    answer = str(resp).strip()

    show_sources = os.getenv("SHOW_SOURCES", "true").lower() == "true"
    if show_sources:
        answer += "\n\n---\n**Sources:**\n" + format_sources(resp, max_sources=TOP_K)

    return answer


# ======================
# UI (professional layout)
# ======================
CSS = """
/* Layout polish */
.dds-header { display:flex; align-items:center; gap:16px; }
.dds-logo img { height:60px; width:auto; border-radius:10px; box-shadow: 0 2px 10px rgba(0,0,0,0.10); }
.dds-title { margin:0; line-height:1.1; }
.dds-subtitle { margin:6px 0 0 0; color: #555; }
.dds-card { border: 1px solid rgba(0,0,0,0.08); border-radius: 14px; padding: 14px; background: rgba(255,255,255,0.7); }
.dds-section-title { margin: 0 0 6px 0; }
.dds-muted { color: #666; font-size: 0.95rem; }
"""

# Theme fallback (no theme passed to ChatInterface itself)
try:
    theme_obj = gr.themes.Soft()
except Exception:
    theme_obj = None

with gr.Blocks(theme=theme_obj, css=CSS, title=APP_TITLE) as demo:
    # Header row (Logo left + Title right)
    with gr.Row():
        with gr.Column(scale=1, min_width=140):
            # Use HTML for reliable remote image rendering
            gr.HTML(
                f"""
                <div class="dds-logo">
                    <img src="{LOGO_URL}" alt="DDS Logo"/>
                </div>
                """
            )
        with gr.Column(scale=6):
            gr.HTML(
                f"""
                <div class="dds-header">
                    <div>
                        <h2 class="dds-title">{APP_TITLE}</h2>
                        <p class="dds-subtitle">{APP_SUBTITLE}</p>
                        <p class="dds-muted">
                            Tip: If an answer is missing, add more official details to <b>challenge_context.txt</b> and restart the Space.
                        </p>
                    </div>
                </div>
                """
            )

    gr.Markdown("---")

    # Two professional sections
    with gr.Row():
        # Section 1: Chat
        with gr.Column(scale=6):
            gr.HTML(
                """
                <div class="dds-card">
                    <h3 class="dds-section-title">Section 1 — Ask the Copilot</h3>
                    <p class="dds-muted">RAG flow: retrieve relevant chunks → generate a grounded answer using your LLM API.</p>
                </div>
                """
            )

            # ChatInterface (NO theme kwarg here)
            gr.ChatInterface(
                fn=chat,
                examples=[
                    "What will I build in this live session?",
                    "Who is this best for?",
                    "What are the prerequisites?",
                    "What is the RAG flow in this project?"
                ],
            )

        # Section 2: FAQ
        with gr.Column(scale=4):
            gr.HTML(
                """
                <div class="dds-card">
                    <h3 class="dds-section-title">Section 2 — FAQ</h3>
                    <p class="dds-muted">Common issues + quick fixes for deployment and content quality.</p>
                </div>
                """
            )

            with gr.Accordion("FAQ 1 — The bot says “I don’t know”", open=False):
                gr.Markdown(
                    """
- This means the answer is **not present** in `challenge_context.txt`.
- Add the missing official content (rules, checkpoints, prizes, submission format, dates).
- Commit the updated TXT and **restart** the Space.
                    """.strip()
                )

            with gr.Accordion("FAQ 2 — OPENAI_API_KEY missing", open=False):
                gr.Markdown(
                    """
- Go to your Space → **Settings → Variables and secrets**
- Add: `OPENAI_API_KEY`
- Save (Space restarts automatically).
                    """.strip()
                )

            with gr.Accordion("FAQ 3 — Sources are not showing", open=False):
                gr.Markdown(
                    """
- Ensure `SHOW_SOURCES=true` in Space variables (or leave it unset; default is true).
- Increase `TOP_K` if you want more retrieved chunks.
                    """.strip()
                )

            with gr.Accordion("FAQ 4 — Improve answer quality", open=False):
                gr.Markdown(
                    """
- Add more structured content into your TXT (headings + bullet points).
- Keep each checkpoint/rule as a clear section.
- Increase `TOP_K` slightly (e.g., 4–6) if context is larger.
                    """.strip()
                )

            with gr.Accordion("FAQ 5 — App fails on startup", open=False):
                gr.Markdown(
                    """
- Check Space logs.
- Most common causes:
  - Missing `challenge_context.txt` in repo
  - Missing `OPENAI_API_KEY`
  - Dependency mismatch (simplify `requirements.txt`)
                    """.strip()
                )

    gr.Markdown("---")
    gr.Markdown(
        """
**Admin notes**
- Context file: `challenge_context.txt`
- Model env vars: `OPENAI_MODEL`, `OPENAI_EMBED_MODEL`
- Retrieval env vars: `TOP_K`
- Sources toggle: `SHOW_SOURCES=true|false`
        """.strip()
    )

if __name__ == "__main__":
    demo.launch()