import os from pathlib import Path import gradio as gr from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings from llama_index.llms.openai import OpenAI from llama_index.embeddings.openai import OpenAIEmbedding MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") EMBED_MODEL = os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small") TOP_K = int(os.getenv("TOP_K", "3")) DOC_PATH = Path(os.getenv("DOC_PATH", "challenge_context.txt")) SYSTEM_GUARDRAILS = ( "You are Challenge Copilot. Answer ONLY using the provided context. " "If the answer is not in the context, say: 'I don’t know based on the current document.' " "Then ask the user to add the missing official details to challenge_context.txt." ) _INDEX = None _QUERY_ENGINE = None def build_index(): global _INDEX, _QUERY_ENGINE if _QUERY_ENGINE is not None: return _QUERY_ENGINE if not os.getenv("OPENAI_API_KEY"): raise RuntimeError( "OPENAI_API_KEY is missing. Add it in the Space Settings → Variables and secrets." ) if not DOC_PATH.exists(): DOC_PATH.write_text( "Add the official Building AI Application Challenge content here.\n", encoding="utf-8", ) Settings.llm = OpenAI(model=MODEL, temperature=0.2) Settings.embed_model = OpenAIEmbedding(model=EMBED_MODEL) Settings.chunk_size = 800 Settings.chunk_overlap = 120 data_dir = str(DOC_PATH.parent) docs = SimpleDirectoryReader( input_dir=data_dir, required_exts=[".txt"], recursive=False ).load_data() docs = [d for d in docs if d.metadata.get("file_name") == DOC_PATH.name] if not docs: raise FileNotFoundError(f"Could not load {DOC_PATH.name}. Make sure it exists in the repo.") _INDEX = VectorStoreIndex.from_documents(docs) _QUERY_ENGINE = _INDEX.as_query_engine(similarity_top_k=TOP_K) return _QUERY_ENGINE def format_sources(resp, max_sources=3, max_chars=220): lines = [] for i, sn in enumerate(getattr(resp, "source_nodes", [])[:max_sources], start=1): fn = sn.node.metadata.get("file_name", "unknown") snippet = sn.node.get_content().replace("\n", " ").strip()[:max_chars] score = getattr(sn, "score", None) score_txt = f" (score={score:.3f})" if isinstance(score, (float, int)) else "" lines.append(f"{i}. {fn}{score_txt}: {snippet}...") return "\n".join(lines) if lines else "No sources returned." def chat(message, history): qe = build_index() prompt = ( f"{SYSTEM_GUARDRAILS}\n\n" f"User question: {message}\n" f"Answer using ONLY the context." ) resp = qe.query(prompt) answer = str(resp).strip() show_sources = os.getenv("SHOW_SOURCES", "true").lower() == "true" if show_sources: answer += "\n\n---\nSources:\n" + format_sources(resp, max_sources=TOP_K) return answer # ---- UI ---- try: theme_obj = gr.themes.Soft() except Exception: theme_obj = None # compatibility fallback with gr.Blocks(theme=theme_obj) as demo: gr.Markdown("# Challenge Copilot — RAG Q&A Bot") gr.Markdown("Ask questions about the Building AI Application Challenge using challenge_context.txt (LlamaIndex + OpenAI).") gr.ChatInterface( fn=chat, examples=[ "What will I build in this live session?", "Who is this best for?", "What are the prerequisites?" ], ) if __name__ == "__main__": demo.launch()