NLP-RAG / config.yaml
Qar-Raz's picture
hf-space: deploy branch without frontend/data/results
c7256ee
# ------------------------------------------------------------------
# RAG CBT QUESTION-ANSWERING SYSTEM CONFIGURATION
# ------------------------------------------------------------------
project:
name: "cbt-rag-system"
category: "psychology"
doc_limit: null # Load all pages from the book
processing:
# Embedding model used for both vector db and evaluator similarity
embedding_model: "jinaai/jina-embeddings-v2-small-en"
# Options: sentence, recursive, semantic, fixed
technique: "recursive"
# Jina supports 8192 tokens (~32k chars), using 1000 chars for better context
chunk_size: 1000
chunk_overlap: 100
vector_db:
base_index_name: "cbt-book"
dimension: 512 # Jina outputs 512 dimensions
metric: "cosine"
batch_size: 50 # Reduced batch size for CPU processing
retrieval:
# Options: hybrid, semantic, bm25
mode: "hybrid"
# Options: cross-encoder, rrf
rerank_strategy: "cross-encoder"
use_mmr: true
top_k: 10
final_k: 5
generation:
temperature: 0.
max_new_tokens: 512
# The model used to Judge the others (OpenRouter)
judge_model: "stepfun/step-3.5-flash:free"
# List of contestants in the tournament
models:
- "Llama-3-8B"
- "Mistral-7B"
- "Qwen-2.5"
- "DeepSeek-V3"
- "TinyAya"