Spaces:
Runtime error
Runtime error
Sync from GitHub via hub-sync
Browse files- README.md +54 -9
- server_app.py +11 -1
- src/document_processor.py +21 -12
- src/hybrid_search.py +6 -4
- src/qdrant_keepalive.py +83 -0
- src/rag_system.py +38 -12
- src/vector_store.py +17 -7
README.md
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
---
|
| 2 |
title: Code Compass API
|
| 3 |
-
emoji: 🚀
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: indigo
|
| 6 |
sdk: docker
|
|
@@ -9,13 +8,59 @@ app_port: 7860
|
|
| 9 |
|
| 10 |
# Code Compass Backend
|
| 11 |
|
| 12 |
-
FastAPI backend for a
|
| 13 |
|
| 14 |
-
|
| 15 |
|
| 16 |
-
-
|
| 17 |
-
-
|
| 18 |
-
-
|
| 19 |
-
-
|
| 20 |
-
-
|
| 21 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: Code Compass API
|
|
|
|
| 3 |
colorFrom: blue
|
| 4 |
colorTo: indigo
|
| 5 |
sdk: docker
|
|
|
|
| 8 |
|
| 9 |
# Code Compass Backend
|
| 10 |
|
| 11 |
+
FastAPI backend for Code Compass, a personal full-stack RAG project that indexes public GitHub repositories and answers questions with grounded source citations.
|
| 12 |
|
| 13 |
+
## What This Demonstrates
|
| 14 |
|
| 15 |
+
- End-to-end AI application design, not just a prompt wrapper
|
| 16 |
+
- Backend API design with FastAPI, Pydantic validation, and session-scoped state
|
| 17 |
+
- Code-aware retrieval using tree-sitter chunking, vector search, BM25, rank fusion, and reranking
|
| 18 |
+
- Grounded answer generation with file-level citations
|
| 19 |
+
- Deployment-aware tradeoffs for cost, model choice, and free-tier infrastructure
|
| 20 |
+
- Evaluation workflow prepared for retrieval and answer-quality metrics
|
| 21 |
+
|
| 22 |
+
## Backend Responsibilities
|
| 23 |
+
|
| 24 |
+
- Clone a public GitHub repository into temporary storage
|
| 25 |
+
- Filter and chunk source files for retrieval
|
| 26 |
+
- Generate embeddings and store chunks in Qdrant
|
| 27 |
+
- Maintain lightweight repository and session metadata in SQLite
|
| 28 |
+
- Run indexing as a background task
|
| 29 |
+
- Retrieve evidence with semantic search, lexical search, fusion, and reranking
|
| 30 |
+
- Generate answers from the selected context and return citations to the UI
|
| 31 |
+
- Delete cloned repository files after indexing
|
| 32 |
+
|
| 33 |
+
## Runtime Configuration
|
| 34 |
+
|
| 35 |
+
Local development is configured for higher-quality experimentation:
|
| 36 |
+
|
| 37 |
+
- `LLM_PROVIDER=bedrock`
|
| 38 |
+
- `EMBEDDING_PROVIDER=bedrock`
|
| 39 |
+
- Claude on Amazon Bedrock for answer generation
|
| 40 |
+
- Cohere Embed on Amazon Bedrock for semantic retrieval
|
| 41 |
+
|
| 42 |
+
Production is configured for lower-cost hosting:
|
| 43 |
+
|
| 44 |
+
- `LLM_PROVIDER=groq`
|
| 45 |
+
- `EMBEDDING_PROVIDER=local`
|
| 46 |
+
- Groq-hosted Llama for answer generation
|
| 47 |
+
- Local sentence-transformer embeddings for retrieval
|
| 48 |
+
- Qdrant Cloud for vector storage
|
| 49 |
+
|
| 50 |
+
## Qdrant Keepalive
|
| 51 |
+
|
| 52 |
+
The backend starts a lightweight Qdrant keepalive scheduler when `QDRANT_URL` is configured. It calls the configured collection every 12 hours by default so a free-tier Qdrant cluster does not become inactive while the backend process is running.
|
| 53 |
+
|
| 54 |
+
Configuration:
|
| 55 |
+
|
| 56 |
+
- `QDRANT_URL`
|
| 57 |
+
- `QDRANT_API_KEY`
|
| 58 |
+
- `QDRANT_COLLECTION=repo_qa_chunks`
|
| 59 |
+
- `QDRANT_KEEPALIVE_ENABLED=true`
|
| 60 |
+
- `QDRANT_KEEPALIVE_INTERVAL_SECONDS=43200`
|
| 61 |
+
|
| 62 |
+
The main repository also includes a GitHub Actions keepalive workflow for cases where the backend host is asleep.
|
| 63 |
+
|
| 64 |
+
## Metrics
|
| 65 |
+
|
| 66 |
+
Metrics will be added after the next benchmark rerun. The evaluation harness is set up to report retrieval hit rate, top-1 hit rate, mean reciprocal rank, source recall, grounded answer rate, checklist pass rate, and optional RAGAS judge metrics.
|
server_app.py
CHANGED
|
@@ -8,6 +8,7 @@ from pydantic import BaseModel, Field, HttpUrl
|
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
|
| 10 |
from src.bedrock_claude import BedrockTransientError, is_bedrock_retryable_error
|
|
|
|
| 11 |
from src.rag_system import CodebaseRAGSystem
|
| 12 |
|
| 13 |
load_dotenv(Path(__file__).with_name(".env"))
|
|
@@ -34,6 +35,7 @@ app.add_middleware(
|
|
| 34 |
)
|
| 35 |
|
| 36 |
rag_system: Optional[CodebaseRAGSystem] = None
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
class RepoIndexRequest(BaseModel):
|
|
@@ -60,9 +62,17 @@ def require_session_id(x_session_id: Optional[str] = Header(None, alias="X-Sessi
|
|
| 60 |
|
| 61 |
@app.on_event("startup")
|
| 62 |
def startup():
|
| 63 |
-
global rag_system
|
| 64 |
Path("./data").mkdir(exist_ok=True)
|
| 65 |
rag_system = CodebaseRAGSystem()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
|
| 68 |
@app.get("/")
|
|
|
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
|
| 10 |
from src.bedrock_claude import BedrockTransientError, is_bedrock_retryable_error
|
| 11 |
+
from src.qdrant_keepalive import QdrantKeepAliveScheduler
|
| 12 |
from src.rag_system import CodebaseRAGSystem
|
| 13 |
|
| 14 |
load_dotenv(Path(__file__).with_name(".env"))
|
|
|
|
| 35 |
)
|
| 36 |
|
| 37 |
rag_system: Optional[CodebaseRAGSystem] = None
|
| 38 |
+
qdrant_keepalive: Optional[QdrantKeepAliveScheduler] = None
|
| 39 |
|
| 40 |
|
| 41 |
class RepoIndexRequest(BaseModel):
|
|
|
|
| 62 |
|
| 63 |
@app.on_event("startup")
|
| 64 |
def startup():
|
| 65 |
+
global qdrant_keepalive, rag_system
|
| 66 |
Path("./data").mkdir(exist_ok=True)
|
| 67 |
rag_system = CodebaseRAGSystem()
|
| 68 |
+
qdrant_keepalive = QdrantKeepAliveScheduler(rag_system.vector_store)
|
| 69 |
+
qdrant_keepalive.start()
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
@app.on_event("shutdown")
|
| 73 |
+
def shutdown():
|
| 74 |
+
if qdrant_keepalive is not None:
|
| 75 |
+
qdrant_keepalive.stop()
|
| 76 |
|
| 77 |
|
| 78 |
@app.get("/")
|
src/document_processor.py
CHANGED
|
@@ -5,8 +5,9 @@ import pypdf
|
|
| 5 |
|
| 6 |
|
| 7 |
class DocumentProcessor:
|
| 8 |
-
|
| 9 |
-
|
|
|
|
| 10 |
self.chunk_size = chunk_size
|
| 11 |
self.chunk_overlap = chunk_overlap
|
| 12 |
|
|
@@ -19,7 +20,6 @@ class DocumentProcessor:
|
|
| 19 |
text += page.extract_text() + "\n"
|
| 20 |
except Exception as e:
|
| 21 |
raise ValueError(f"Error reading PDF: {str(e)}")
|
| 22 |
-
|
| 23 |
return text.strip()
|
| 24 |
|
| 25 |
def chunk_text(self, text: str) -> List[str]:
|
|
@@ -35,24 +35,34 @@ class DocumentProcessor:
|
|
| 35 |
chunk = text[start:end]
|
| 36 |
|
| 37 |
if end < text_length:
|
|
|
|
|
|
|
|
|
|
| 38 |
last_period = chunk.rfind(".")
|
| 39 |
last_newline = chunk.rfind("\n")
|
| 40 |
-
break_point = max(last_period, last_newline)
|
| 41 |
|
| 42 |
-
if
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
-
|
|
|
|
|
|
|
| 47 |
|
| 48 |
start = end - self.chunk_overlap
|
| 49 |
|
| 50 |
return [c for c in chunks if c]
|
| 51 |
|
| 52 |
def process_document(self, file_path: str) -> Tuple[str, List[str]]:
|
| 53 |
-
|
| 54 |
file_ext = Path(file_path).suffix.lower()
|
| 55 |
-
|
| 56 |
if file_ext == ".pdf":
|
| 57 |
text = self.extract_text_from_pdf(file_path)
|
| 58 |
elif file_ext == ".txt":
|
|
@@ -62,7 +72,6 @@ class DocumentProcessor:
|
|
| 62 |
raise ValueError(f"Unsupported file type: {file_ext}")
|
| 63 |
|
| 64 |
chunks = self.chunk_text(text)
|
| 65 |
-
|
| 66 |
return text, chunks
|
| 67 |
|
| 68 |
@staticmethod
|
|
@@ -71,4 +80,4 @@ class DocumentProcessor:
|
|
| 71 |
with open(file_path, "rb") as f:
|
| 72 |
for chunk in iter(lambda: f.read(4096), b""):
|
| 73 |
hash_md5.update(chunk)
|
| 74 |
-
return hash_md5.hexdigest()
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
class DocumentProcessor:
|
| 8 |
+
# FIX: 512 chars was too small — caused mid-thought splits that hurt faithfulness.
|
| 9 |
+
# 1200 chars with 150 overlap keeps full function signatures + docstrings intact.
|
| 10 |
+
def __init__(self, chunk_size: int = 1200, chunk_overlap: int = 150):
|
| 11 |
self.chunk_size = chunk_size
|
| 12 |
self.chunk_overlap = chunk_overlap
|
| 13 |
|
|
|
|
| 20 |
text += page.extract_text() + "\n"
|
| 21 |
except Exception as e:
|
| 22 |
raise ValueError(f"Error reading PDF: {str(e)}")
|
|
|
|
| 23 |
return text.strip()
|
| 24 |
|
| 25 |
def chunk_text(self, text: str) -> List[str]:
|
|
|
|
| 35 |
chunk = text[start:end]
|
| 36 |
|
| 37 |
if end < text_length:
|
| 38 |
+
# FIX: prefer paragraph breaks > sentence breaks > newlines.
|
| 39 |
+
# Previously only checked period + newline, missing paragraph boundaries.
|
| 40 |
+
last_double_newline = chunk.rfind("\n\n")
|
| 41 |
last_period = chunk.rfind(".")
|
| 42 |
last_newline = chunk.rfind("\n")
|
|
|
|
| 43 |
|
| 44 |
+
if last_double_newline > self.chunk_size * 0.4:
|
| 45 |
+
break_point = last_double_newline + 1
|
| 46 |
+
elif last_period > self.chunk_size * 0.5:
|
| 47 |
+
break_point = last_period + 1
|
| 48 |
+
elif last_newline > self.chunk_size * 0.4:
|
| 49 |
+
break_point = last_newline + 1
|
| 50 |
+
else:
|
| 51 |
+
break_point = self.chunk_size
|
| 52 |
+
|
| 53 |
+
chunk = chunk[:break_point]
|
| 54 |
+
end = start + break_point
|
| 55 |
|
| 56 |
+
stripped = chunk.strip()
|
| 57 |
+
if stripped:
|
| 58 |
+
chunks.append(stripped)
|
| 59 |
|
| 60 |
start = end - self.chunk_overlap
|
| 61 |
|
| 62 |
return [c for c in chunks if c]
|
| 63 |
|
| 64 |
def process_document(self, file_path: str) -> Tuple[str, List[str]]:
|
|
|
|
| 65 |
file_ext = Path(file_path).suffix.lower()
|
|
|
|
| 66 |
if file_ext == ".pdf":
|
| 67 |
text = self.extract_text_from_pdf(file_path)
|
| 68 |
elif file_ext == ".txt":
|
|
|
|
| 72 |
raise ValueError(f"Unsupported file type: {file_ext}")
|
| 73 |
|
| 74 |
chunks = self.chunk_text(text)
|
|
|
|
| 75 |
return text, chunks
|
| 76 |
|
| 77 |
@staticmethod
|
|
|
|
| 80 |
with open(file_path, "rb") as f:
|
| 81 |
for chunk in iter(lambda: f.read(4096), b""):
|
| 82 |
hash_md5.update(chunk)
|
| 83 |
+
return hash_md5.hexdigest()
|
src/hybrid_search.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
import math
|
| 2 |
import re
|
| 3 |
from collections import defaultdict
|
| 4 |
from typing import List
|
|
@@ -6,7 +5,6 @@ from typing import List
|
|
| 6 |
from rank_bm25 import BM25Okapi
|
| 7 |
from sentence_transformers import CrossEncoder
|
| 8 |
|
| 9 |
-
|
| 10 |
TOKEN_RE = re.compile(r"[A-Za-z_][A-Za-z0-9_./:-]*")
|
| 11 |
|
| 12 |
|
|
@@ -27,7 +25,6 @@ class HybridSearchEngine:
|
|
| 27 |
def bm25_search(self, chunks: List[dict], query: str, top_k: int = 12) -> List[dict]:
|
| 28 |
if not chunks:
|
| 29 |
return []
|
| 30 |
-
|
| 31 |
tokens = tokenize(query)
|
| 32 |
if not tokens:
|
| 33 |
return []
|
|
@@ -73,6 +70,11 @@ class HybridSearchEngine:
|
|
| 73 |
return merged[:top_k]
|
| 74 |
|
| 75 |
def rerank(self, query: str, candidates: List[dict], top_k: int = 6) -> List[dict]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
if not candidates:
|
| 77 |
return []
|
| 78 |
|
|
@@ -99,4 +101,4 @@ class HybridSearchEngine:
|
|
| 99 |
enriched["semantic_rank"] = rank
|
| 100 |
enriched["semantic_score"] = float(item.get("semantic_score", 0.0))
|
| 101 |
normalized.append(enriched)
|
| 102 |
-
return normalized
|
|
|
|
|
|
|
| 1 |
import re
|
| 2 |
from collections import defaultdict
|
| 3 |
from typing import List
|
|
|
|
| 5 |
from rank_bm25 import BM25Okapi
|
| 6 |
from sentence_transformers import CrossEncoder
|
| 7 |
|
|
|
|
| 8 |
TOKEN_RE = re.compile(r"[A-Za-z_][A-Za-z0-9_./:-]*")
|
| 9 |
|
| 10 |
|
|
|
|
| 25 |
def bm25_search(self, chunks: List[dict], query: str, top_k: int = 12) -> List[dict]:
|
| 26 |
if not chunks:
|
| 27 |
return []
|
|
|
|
| 28 |
tokens = tokenize(query)
|
| 29 |
if not tokens:
|
| 30 |
return []
|
|
|
|
| 70 |
return merged[:top_k]
|
| 71 |
|
| 72 |
def rerank(self, query: str, candidates: List[dict], top_k: int = 6) -> List[dict]:
|
| 73 |
+
"""
|
| 74 |
+
FIX: top_k now defaults to 6 and callers should pass a small final number (4-6),
|
| 75 |
+
NOT search_depth (which was up to 120). Reranking 120 items then dumping them
|
| 76 |
+
all into the LLM context was the main faithfulness killer.
|
| 77 |
+
"""
|
| 78 |
if not candidates:
|
| 79 |
return []
|
| 80 |
|
|
|
|
| 101 |
enriched["semantic_rank"] = rank
|
| 102 |
enriched["semantic_score"] = float(item.get("semantic_score", 0.0))
|
| 103 |
normalized.append(enriched)
|
| 104 |
+
return normalized
|
src/qdrant_keepalive.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import threading
|
| 3 |
+
from typing import Optional
|
| 4 |
+
|
| 5 |
+
from src.vector_store import QdrantVectorStore
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class QdrantKeepAliveScheduler:
|
| 9 |
+
def __init__(self, vector_store: QdrantVectorStore):
|
| 10 |
+
self.vector_store = vector_store
|
| 11 |
+
self.interval_seconds = self._interval_seconds()
|
| 12 |
+
self.run_on_start = self._env_flag("QDRANT_KEEPALIVE_RUN_ON_START", True)
|
| 13 |
+
self.keepalive_enabled = self._env_flag("QDRANT_KEEPALIVE_ENABLED", True)
|
| 14 |
+
self.enabled = self.keepalive_enabled and self.vector_store.is_remote()
|
| 15 |
+
self._stop_event = threading.Event()
|
| 16 |
+
self._thread: Optional[threading.Thread] = None
|
| 17 |
+
|
| 18 |
+
def start(self):
|
| 19 |
+
if not self.enabled:
|
| 20 |
+
reason = (
|
| 21 |
+
"disabled by QDRANT_KEEPALIVE_ENABLED"
|
| 22 |
+
if not self.keepalive_enabled
|
| 23 |
+
else "set QDRANT_URL to enable remote Qdrant pings"
|
| 24 |
+
)
|
| 25 |
+
print(
|
| 26 |
+
f"[qdrant-keepalive] Disabled; {reason}",
|
| 27 |
+
flush=True,
|
| 28 |
+
)
|
| 29 |
+
return
|
| 30 |
+
if self._thread and self._thread.is_alive():
|
| 31 |
+
return
|
| 32 |
+
|
| 33 |
+
self._stop_event.clear()
|
| 34 |
+
self._thread = threading.Thread(
|
| 35 |
+
target=self._run,
|
| 36 |
+
name="qdrant-keepalive",
|
| 37 |
+
daemon=True,
|
| 38 |
+
)
|
| 39 |
+
self._thread.start()
|
| 40 |
+
print(
|
| 41 |
+
f"[qdrant-keepalive] Started interval_seconds={self.interval_seconds}",
|
| 42 |
+
flush=True,
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
def stop(self):
|
| 46 |
+
self._stop_event.set()
|
| 47 |
+
if self._thread and self._thread.is_alive():
|
| 48 |
+
self._thread.join(timeout=5)
|
| 49 |
+
self._thread = None
|
| 50 |
+
|
| 51 |
+
def _run(self):
|
| 52 |
+
if self.run_on_start:
|
| 53 |
+
self._ping()
|
| 54 |
+
|
| 55 |
+
while not self._stop_event.wait(self.interval_seconds):
|
| 56 |
+
self._ping()
|
| 57 |
+
|
| 58 |
+
def _ping(self):
|
| 59 |
+
try:
|
| 60 |
+
stats = self.vector_store.keep_alive()
|
| 61 |
+
print(
|
| 62 |
+
"[qdrant-keepalive] Ping succeeded "
|
| 63 |
+
f"collection={stats['collection_name']} "
|
| 64 |
+
f"points={stats['total_vectors']}",
|
| 65 |
+
flush=True,
|
| 66 |
+
)
|
| 67 |
+
except Exception as exc:
|
| 68 |
+
print(f"[qdrant-keepalive] Ping failed: {exc}", flush=True)
|
| 69 |
+
|
| 70 |
+
@staticmethod
|
| 71 |
+
def _env_flag(name: str, default: bool) -> bool:
|
| 72 |
+
value = os.getenv(name)
|
| 73 |
+
if value is None:
|
| 74 |
+
return default
|
| 75 |
+
return value.strip().lower() not in {"0", "false", "no", "off"}
|
| 76 |
+
|
| 77 |
+
@staticmethod
|
| 78 |
+
def _interval_seconds() -> int:
|
| 79 |
+
value = os.getenv("QDRANT_KEEPALIVE_INTERVAL_SECONDS", "43200")
|
| 80 |
+
try:
|
| 81 |
+
return max(60, int(value))
|
| 82 |
+
except ValueError:
|
| 83 |
+
return 43200
|
src/rag_system.py
CHANGED
|
@@ -333,7 +333,7 @@ class CodebaseRAGSystem:
|
|
| 333 |
session_key: str,
|
| 334 |
question: str,
|
| 335 |
top_k: int = 8,
|
| 336 |
-
history
|
| 337 |
) -> dict:
|
| 338 |
session = get_db_session(self.database_url)
|
| 339 |
try:
|
|
@@ -369,8 +369,10 @@ class CodebaseRAGSystem:
|
|
| 369 |
else top_k * shallow_multiplier
|
| 370 |
)
|
| 371 |
search_depth = max(top_k, min(search_depth, 120))
|
|
|
|
| 372 |
retrieval_query = self._build_retrieval_query(question, normalized_history)
|
| 373 |
query_embedding = self.embedder.embed_text(retrieval_query)
|
|
|
|
| 374 |
semantic_hits = []
|
| 375 |
for score, meta in self.vector_store.search(query_embedding, k=search_depth, repo_filter=repo_id):
|
| 376 |
serialized = dict(meta)
|
|
@@ -384,6 +386,7 @@ class CodebaseRAGSystem:
|
|
| 384 |
)
|
| 385 |
semantic_hits = self.hybrid_search.normalize_semantic_results(semantic_hits)
|
| 386 |
fused = self.hybrid_search.reciprocal_rank_fusion(lexical_hits, semantic_hits, top_k=search_depth)
|
|
|
|
| 387 |
path_hits = self._path_intent_search(
|
| 388 |
self.repo_chunks[repo_id],
|
| 389 |
question,
|
|
@@ -391,13 +394,25 @@ class CodebaseRAGSystem:
|
|
| 391 |
top_k=search_depth,
|
| 392 |
)
|
| 393 |
fused = self._merge_ranked_candidates(fused, path_hits, top_k=search_depth)
|
|
|
|
| 394 |
rerank_query = retrieval_query if question_intent in deep_search_intents else question
|
| 395 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
reranked = self._prioritize_results(question, retrieval_query, reranked, top_k=top_k)
|
| 397 |
-
reranked = self._select_answer_sources(question, reranked, top_k=top_k)
|
| 398 |
|
| 399 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
|
|
|
|
| 401 |
return answer
|
| 402 |
finally:
|
| 403 |
session.close()
|
|
@@ -411,12 +426,13 @@ class CodebaseRAGSystem:
|
|
| 411 |
finally:
|
| 412 |
session.close()
|
| 413 |
|
|
|
|
| 414 |
def _generate_answer(
|
| 415 |
self,
|
| 416 |
-
repo
|
| 417 |
question: str,
|
| 418 |
-
sources:
|
| 419 |
-
history
|
| 420 |
) -> dict:
|
| 421 |
if not sources:
|
| 422 |
return {
|
|
@@ -428,7 +444,10 @@ class CodebaseRAGSystem:
|
|
| 428 |
|
| 429 |
context_blocks = []
|
| 430 |
slim_sources = []
|
|
|
|
| 431 |
for index, source in enumerate(sources, start=1):
|
|
|
|
|
|
|
| 432 |
context_blocks.append(
|
| 433 |
"\n".join(
|
| 434 |
[
|
|
@@ -436,7 +455,7 @@ class CodebaseRAGSystem:
|
|
| 436 |
f"File: {source['file_path']}",
|
| 437 |
f"Symbol: {source['symbol_name']}",
|
| 438 |
f"Lines: {source['line_start']}-{source['line_end']}",
|
| 439 |
-
|
| 440 |
]
|
| 441 |
)
|
| 442 |
)
|
|
@@ -495,17 +514,23 @@ Rules:
|
|
| 495 |
"""
|
| 496 |
|
| 497 |
joined_context = "\n\n".join(context_blocks)
|
|
|
|
|
|
|
| 498 |
user_prompt = f"""
|
| 499 |
Repository: {repo.owner}/{repo.name}
|
| 500 |
-
|
|
|
|
|
|
|
|
|
|
| 501 |
Recent conversation:
|
| 502 |
{self._format_history(history or [])}
|
| 503 |
|
| 504 |
-
|
| 505 |
-
{
|
| 506 |
"""
|
| 507 |
|
| 508 |
answer_text, finish_reason = self._generate_markdown_response(system_prompt, user_prompt)
|
|
|
|
| 509 |
if self._looks_incomplete(answer_text, finish_reason):
|
| 510 |
repair_prompt = f"""
|
| 511 |
The draft answer below appears to be cut off or incomplete.
|
|
@@ -519,7 +544,7 @@ Draft answer:
|
|
| 519 |
f"{user_prompt.strip()}\n\n{repair_prompt.strip()}",
|
| 520 |
)
|
| 521 |
if self._looks_incomplete(answer_text, finish_reason):
|
| 522 |
-
short_prompt =
|
| 523 |
Answer the question again, but keep it concise and complete.
|
| 524 |
Use 2 short paragraphs or 4-6 bullets max.
|
| 525 |
Do not leave the answer unfinished.
|
|
@@ -528,6 +553,7 @@ Do not leave the answer unfinished.
|
|
| 528 |
system_prompt,
|
| 529 |
f"{user_prompt.strip()}\n\n{short_prompt.strip()}",
|
| 530 |
)
|
|
|
|
| 531 |
answer_text = self._finalize_answer(answer_text)
|
| 532 |
confidence = self._estimate_confidence(sources)
|
| 533 |
summary = " ".join(answer_text.split())[:160] if answer_text else ""
|
|
|
|
| 333 |
session_key: str,
|
| 334 |
question: str,
|
| 335 |
top_k: int = 8,
|
| 336 |
+
history=None,
|
| 337 |
) -> dict:
|
| 338 |
session = get_db_session(self.database_url)
|
| 339 |
try:
|
|
|
|
| 369 |
else top_k * shallow_multiplier
|
| 370 |
)
|
| 371 |
search_depth = max(top_k, min(search_depth, 120))
|
| 372 |
+
|
| 373 |
retrieval_query = self._build_retrieval_query(question, normalized_history)
|
| 374 |
query_embedding = self.embedder.embed_text(retrieval_query)
|
| 375 |
+
|
| 376 |
semantic_hits = []
|
| 377 |
for score, meta in self.vector_store.search(query_embedding, k=search_depth, repo_filter=repo_id):
|
| 378 |
serialized = dict(meta)
|
|
|
|
| 386 |
)
|
| 387 |
semantic_hits = self.hybrid_search.normalize_semantic_results(semantic_hits)
|
| 388 |
fused = self.hybrid_search.reciprocal_rank_fusion(lexical_hits, semantic_hits, top_k=search_depth)
|
| 389 |
+
|
| 390 |
path_hits = self._path_intent_search(
|
| 391 |
self.repo_chunks[repo_id],
|
| 392 |
question,
|
|
|
|
| 394 |
top_k=search_depth,
|
| 395 |
)
|
| 396 |
fused = self._merge_ranked_candidates(fused, path_hits, top_k=search_depth)
|
| 397 |
+
|
| 398 |
rerank_query = retrieval_query if question_intent in deep_search_intents else question
|
| 399 |
+
|
| 400 |
+
# FIX: rerank to a small candidate pool first (20), then let
|
| 401 |
+
# _prioritize_results and _select_answer_sources trim to final top_k.
|
| 402 |
+
# Previously rerank was called with search_depth (up to 120), meaning
|
| 403 |
+
# the LLM received far too many chunks and faithfulness dropped.
|
| 404 |
+
rerank_pool = min(search_depth, 20)
|
| 405 |
+
reranked = self.hybrid_search.rerank(rerank_query, fused, top_k=rerank_pool)
|
| 406 |
+
|
| 407 |
reranked = self._prioritize_results(question, retrieval_query, reranked, top_k=top_k)
|
|
|
|
| 408 |
|
| 409 |
+
# FIX: cap final sources at 5 instead of top_k (8).
|
| 410 |
+
# 5 sources × 1500 chars = ~7500 chars context, which the LLM handles well.
|
| 411 |
+
# 8 sources × 2500 chars = ~20000 chars, which causes lost-in-the-middle issues.
|
| 412 |
+
final_top_k = min(top_k, 5)
|
| 413 |
+
reranked = self._select_answer_sources(question, reranked, top_k=final_top_k)
|
| 414 |
|
| 415 |
+
answer = self._generate_answer(repo, question, reranked, normalized_history)
|
| 416 |
return answer
|
| 417 |
finally:
|
| 418 |
session.close()
|
|
|
|
| 426 |
finally:
|
| 427 |
session.close()
|
| 428 |
|
| 429 |
+
|
| 430 |
def _generate_answer(
|
| 431 |
self,
|
| 432 |
+
repo,
|
| 433 |
question: str,
|
| 434 |
+
sources: list,
|
| 435 |
+
history=None,
|
| 436 |
) -> dict:
|
| 437 |
if not sources:
|
| 438 |
return {
|
|
|
|
| 444 |
|
| 445 |
context_blocks = []
|
| 446 |
slim_sources = []
|
| 447 |
+
|
| 448 |
for index, source in enumerate(sources, start=1):
|
| 449 |
+
content_preview = source["content"][:1500]
|
| 450 |
+
|
| 451 |
context_blocks.append(
|
| 452 |
"\n".join(
|
| 453 |
[
|
|
|
|
| 455 |
f"File: {source['file_path']}",
|
| 456 |
f"Symbol: {source['symbol_name']}",
|
| 457 |
f"Lines: {source['line_start']}-{source['line_end']}",
|
| 458 |
+
content_preview,
|
| 459 |
]
|
| 460 |
)
|
| 461 |
)
|
|
|
|
| 514 |
"""
|
| 515 |
|
| 516 |
joined_context = "\n\n".join(context_blocks)
|
| 517 |
+
|
| 518 |
+
# FIX: context is placed BEFORE the question (prompt ordering fix).
|
| 519 |
user_prompt = f"""
|
| 520 |
Repository: {repo.owner}/{repo.name}
|
| 521 |
+
|
| 522 |
+
Context from the codebase:
|
| 523 |
+
{joined_context}
|
| 524 |
+
|
| 525 |
Recent conversation:
|
| 526 |
{self._format_history(history or [])}
|
| 527 |
|
| 528 |
+
Now answer this question using only the context above:
|
| 529 |
+
{question}
|
| 530 |
"""
|
| 531 |
|
| 532 |
answer_text, finish_reason = self._generate_markdown_response(system_prompt, user_prompt)
|
| 533 |
+
|
| 534 |
if self._looks_incomplete(answer_text, finish_reason):
|
| 535 |
repair_prompt = f"""
|
| 536 |
The draft answer below appears to be cut off or incomplete.
|
|
|
|
| 544 |
f"{user_prompt.strip()}\n\n{repair_prompt.strip()}",
|
| 545 |
)
|
| 546 |
if self._looks_incomplete(answer_text, finish_reason):
|
| 547 |
+
short_prompt = """
|
| 548 |
Answer the question again, but keep it concise and complete.
|
| 549 |
Use 2 short paragraphs or 4-6 bullets max.
|
| 550 |
Do not leave the answer unfinished.
|
|
|
|
| 553 |
system_prompt,
|
| 554 |
f"{user_prompt.strip()}\n\n{short_prompt.strip()}",
|
| 555 |
)
|
| 556 |
+
|
| 557 |
answer_text = self._finalize_answer(answer_text)
|
| 558 |
confidence = self._estimate_confidence(sources)
|
| 559 |
summary = " ".join(answer_text.split())[:160] if answer_text else ""
|
src/vector_store.py
CHANGED
|
@@ -11,18 +11,18 @@ class QdrantVectorStore:
|
|
| 11 |
self.embedding_dim = embedding_dim
|
| 12 |
self.collection_name = os.getenv("QDRANT_COLLECTION", "repo_qa_chunks")
|
| 13 |
self.upsert_batch_size = max(1, int(os.getenv("QDRANT_UPSERT_BATCH_SIZE", "64")))
|
|
|
|
|
|
|
|
|
|
| 14 |
self.client = self._create_client()
|
| 15 |
self._ensure_collection()
|
| 16 |
|
| 17 |
def _create_client(self):
|
| 18 |
-
|
| 19 |
-
api_key = self._clean_env("QDRANT_API_KEY")
|
| 20 |
-
timeout = int(os.getenv("QDRANT_TIMEOUT_SECONDS", "120"))
|
| 21 |
-
if url:
|
| 22 |
return QdrantClient(
|
| 23 |
-
url=
|
| 24 |
-
api_key=
|
| 25 |
-
timeout=timeout,
|
| 26 |
check_compatibility=False,
|
| 27 |
)
|
| 28 |
return QdrantClient(":memory:")
|
|
@@ -149,6 +149,16 @@ class QdrantVectorStore:
|
|
| 149 |
def load(self):
|
| 150 |
self._ensure_collection()
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
def get_stats(self) -> dict:
|
| 153 |
info = self.client.get_collection(self.collection_name)
|
| 154 |
return {
|
|
|
|
| 11 |
self.embedding_dim = embedding_dim
|
| 12 |
self.collection_name = os.getenv("QDRANT_COLLECTION", "repo_qa_chunks")
|
| 13 |
self.upsert_batch_size = max(1, int(os.getenv("QDRANT_UPSERT_BATCH_SIZE", "64")))
|
| 14 |
+
self.qdrant_url = self._clean_env("QDRANT_URL")
|
| 15 |
+
self.qdrant_api_key = self._clean_env("QDRANT_API_KEY")
|
| 16 |
+
self.timeout = int(os.getenv("QDRANT_TIMEOUT_SECONDS", "120"))
|
| 17 |
self.client = self._create_client()
|
| 18 |
self._ensure_collection()
|
| 19 |
|
| 20 |
def _create_client(self):
|
| 21 |
+
if self.qdrant_url:
|
|
|
|
|
|
|
|
|
|
| 22 |
return QdrantClient(
|
| 23 |
+
url=self.qdrant_url,
|
| 24 |
+
api_key=self.qdrant_api_key,
|
| 25 |
+
timeout=self.timeout,
|
| 26 |
check_compatibility=False,
|
| 27 |
)
|
| 28 |
return QdrantClient(":memory:")
|
|
|
|
| 149 |
def load(self):
|
| 150 |
self._ensure_collection()
|
| 151 |
|
| 152 |
+
def is_remote(self) -> bool:
|
| 153 |
+
return self.qdrant_url is not None
|
| 154 |
+
|
| 155 |
+
def keep_alive(self) -> dict:
|
| 156 |
+
info = self.client.get_collection(self.collection_name)
|
| 157 |
+
return {
|
| 158 |
+
"total_vectors": info.points_count or 0,
|
| 159 |
+
"collection_name": self.collection_name,
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
def get_stats(self) -> dict:
|
| 163 |
info = self.client.get_collection(self.collection_name)
|
| 164 |
return {
|