Spaces:

decodingdatascience
/

Challengebot

Running

App Files Files Community

Challengebot / app.py

decodingdatascience

Update app.py

336d701 verified 1 day ago

raw

history blame contribute delete

3.54 kB

	import os
	from pathlib import Path
	import gradio as gr

	from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
	from llama_index.llms.openai import OpenAI
	from llama_index.embeddings.openai import OpenAIEmbedding

	MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
	EMBED_MODEL = os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small")
	TOP_K = int(os.getenv("TOP_K", "3"))

	DOC_PATH = Path(os.getenv("DOC_PATH", "challenge_context.txt"))

	SYSTEM_GUARDRAILS = (
	"You are Challenge Copilot. Answer ONLY using the provided context. "
	"If the answer is not in the context, say: 'I don’t know based on the current document.' "
	"Then ask the user to add the missing official details to challenge_context.txt."
	)

	_INDEX = None
	_QUERY_ENGINE = None

	def build_index():
	global _INDEX, _QUERY_ENGINE
	if _QUERY_ENGINE is not None:
	return _QUERY_ENGINE

	if not os.getenv("OPENAI_API_KEY"):
	raise RuntimeError(
	"OPENAI_API_KEY is missing. Add it in the Space Settings → Variables and secrets."
	)

	if not DOC_PATH.exists():
	DOC_PATH.write_text(
	"Add the official Building AI Application Challenge content here.\n",
	encoding="utf-8",
	)

	Settings.llm = OpenAI(model=MODEL, temperature=0.2)
	Settings.embed_model = OpenAIEmbedding(model=EMBED_MODEL)
	Settings.chunk_size = 800
	Settings.chunk_overlap = 120

	data_dir = str(DOC_PATH.parent)
	docs = SimpleDirectoryReader(
	input_dir=data_dir,
	required_exts=[".txt"],
	recursive=False
	).load_data()

	docs = [d for d in docs if d.metadata.get("file_name") == DOC_PATH.name]
	if not docs:
	raise FileNotFoundError(f"Could not load {DOC_PATH.name}. Make sure it exists in the repo.")

	_INDEX = VectorStoreIndex.from_documents(docs)
	_QUERY_ENGINE = _INDEX.as_query_engine(similarity_top_k=TOP_K)
	return _QUERY_ENGINE

	def format_sources(resp, max_sources=3, max_chars=220):
	lines = []
	for i, sn in enumerate(getattr(resp, "source_nodes", [])[:max_sources], start=1):
	fn = sn.node.metadata.get("file_name", "unknown")
	snippet = sn.node.get_content().replace("\n", " ").strip()[:max_chars]
	score = getattr(sn, "score", None)
	score_txt = f" (score={score:.3f})" if isinstance(score, (float, int)) else ""
	lines.append(f"{i}. {fn}{score_txt}: {snippet}...")
	return "\n".join(lines) if lines else "No sources returned."

	def chat(message, history):
	qe = build_index()
	prompt = (
	f"{SYSTEM_GUARDRAILS}\n\n"
	f"User question: {message}\n"
	f"Answer using ONLY the context."
	)
	resp = qe.query(prompt)
	answer = str(resp).strip()

	show_sources = os.getenv("SHOW_SOURCES", "true").lower() == "true"
	if show_sources:
	answer += "\n\n---\nSources:\n" + format_sources(resp, max_sources=TOP_K)

	return answer


	# ---- UI ----
	try:
	theme_obj = gr.themes.Soft()
	except Exception:
	theme_obj = None # compatibility fallback

	with gr.Blocks(theme=theme_obj) as demo:
	gr.Markdown("# Challenge Copilot — RAG Q&A Bot")
	gr.Markdown("Ask questions about the Building AI Application Challenge using challenge_context.txt (LlamaIndex + OpenAI).")
	gr.ChatInterface(
	fn=chat,
	examples=[
	"What will I build in this live session?",
	"Who is this best for?",
	"What are the prerequisites?"
	],
	)

	if __name__ == "__main__":
	demo.launch()