NLP-RAG / retriever /generator.py
Qar-Raz's picture
hf-space: deploy branch without frontend/data/results
c7256ee
#changed the prompt to output as markdown, plus some formating details
#also added get answer stream for incremental token rendering on the frontend
# --@Qamar
class RAGGenerator:
def generate_prompt(self, query, retrieved_contexts, context_urls=None):
if context_urls:
context_text = "\n\n".join([f"[Source {i+1}] {url}: {c}" for i, (c, url) in enumerate(zip(retrieved_contexts, context_urls))])
else:
context_text = "\n\n".join([f"[Source {i+1}]: {c}" for i, c in enumerate(retrieved_contexts)])
return f"""You are a specialized Cognitive Behavioral Therapy (CBT) assistant. Your task is to provide accurate, clinical, and structured answers based ONLY on the provided textbook excerpts.
INSTRUCTIONS:
1. Use the provided Sources to answer the question.
2. CITATIONS: You must cite the sources used in your answer (e.g., "CBT is based on the cognitive model [Source 1]").
3. FORMAT: Use clear headers and bullet points for complex explanations.
4. GROUNDING: If the sources do not contain the answer, explicitly state: "The provided excerpts from the textbook do not contain information to answer this specific question." Do not use your own internal knowledge.
5. TONE: Maintain a professional, empathetic, and academic tone.
RETRIVED TEXTBOOK CONTEXT:
{context_text}
USER QUESTION: {query}
ACADEMIC ANSWER (WITH CITATIONS):"""
def get_answer(self, model_instance, query, retrieved_contexts, context_urls=None, **kwargs):
"""Uses a specific model instance to generate the final answer."""
prompt = self.generate_prompt(query, retrieved_contexts, context_urls)
return model_instance.generate(prompt, **kwargs)
def get_answer_stream(self, model_instance, query, retrieved_contexts, context_urls=None, **kwargs):
"""Streams model output token-by-token for incremental UI updates."""
prompt = self.generate_prompt(query, retrieved_contexts, context_urls)
if hasattr(model_instance, "generate_stream"):
for token in model_instance.generate_stream(prompt, **kwargs):
if token:
yield token
return
# Fallback for model wrappers that only expose sync generation.
answer = model_instance.generate(prompt, **kwargs)
if answer:
yield answer