import os import time from dotenv import load_dotenv from config_loader import cfg # Optimized imports - only what we need for Retrieval and Generation from data.vector_db import get_index_by_name, load_chunks_from_pinecone # Using the new helper from retriever.retriever import HybridRetriever from retriever.generator import RAGGenerator from retriever.processor import ChunkProcessor from retriever.evaluator import RAGEvaluator # Model Fleet from models.llama_3_8b import Llama3_8B from models.mistral_7b import Mistral_7b from models.qwen_2_5 import Qwen2_5 from models.deepseek_v3 import DeepSeek_V3 from models.tiny_aya import TinyAya MODEL_MAP = { "Llama-3-8B": Llama3_8B, "Mistral-7B": Mistral_7b, "Qwen-2.5": Qwen2_5, "DeepSeek-V3": DeepSeek_V3, "TinyAya": TinyAya } load_dotenv() def main(): hf_token = os.getenv("HF_TOKEN") pinecone_key = os.getenv("PINECONE_API_KEY") query = "How do transformers handle long sequences?" # 1. Connect to Existing Index (No creation, no uploading) # We use the slugified name directly or via config index_name = f"{cfg.db['base_index_name']}-{cfg.processing['technique']}" index = get_index_by_name(pinecone_key, index_name) # 2. Setup Processor (Required for the Encoder/Embedding model) proc = ChunkProcessor(model_name=cfg.processing['embedding_model']) # 3. Load BM25 Corpus (The "Source of Truth") # This replaces the entire data_loader/chunking block # Note: On first run, this hits Pinecone. Use a pickle cache here for 0s delay. print("šŸ”„ Loading BM25 context from Pinecone metadata...") final_chunks = load_chunks_from_pinecone(index) # 4. Retrieval Setup retriever = HybridRetriever(final_chunks, proc.encoder) print(f"šŸ”Ž Searching via {cfg.retrieval['mode']} mode...") context_chunks = retriever.search( query, index, mode=cfg.retrieval['mode'], rerank_strategy=cfg.retrieval['rerank_strategy'], use_mmr=cfg.retrieval['use_mmr'], top_k=cfg.retrieval['top_k'], final_k=cfg.retrieval['final_k'] ) # 5. Initialization of Contestants rag_engine = RAGGenerator() models = {name: MODEL_MAP[name](token=hf_token) for name in cfg.model_list} evaluator = RAGEvaluator( judge_model=cfg.gen['judge_model'], embedding_model=proc.encoder, api_key=os.getenv("GROQ_API_KEY") ) tournament_results = {} # 6. Tournament Loop for name, model_inst in models.items(): print(f"\nšŸ† Tournament: {name} is generating...") try: # Generation answer = rag_engine.get_answer( model_inst, query, context_chunks, temperature=cfg.gen['temperature'] ) # Faithfulness Evaluation faith = evaluator.evaluate_faithfulness(answer, context_chunks) # Relevancy Evaluation rel = evaluator.evaluate_relevancy(query, answer) tournament_results[name] = { "Answer": answer[:100] + "...", # Preview "Faithfulness": faith['score'], "Relevancy": rel['score'] } print(f"āœ… {name} Score - Faith: {faith['score']} | Rel: {rel['score']}") except Exception as e: print(f"āŒ Error evaluating {name}: {e}") print("\n--- Final Tournament Standings ---") for name, scores in tournament_results.items(): print(f"{name}: F={scores['Faithfulness']}, R={scores['Relevancy']}") if __name__ == "__main__": main()