technophyle commited on
Commit
35c1d2c
·
verified ·
1 Parent(s): f1089a9

Sync from GitHub via hub-sync

Browse files
README.md CHANGED
@@ -1,6 +1,5 @@
1
  ---
2
  title: Code Compass API
3
- emoji: 🚀
4
  colorFrom: blue
5
  colorTo: indigo
6
  sdk: docker
@@ -9,13 +8,59 @@ app_port: 7860
9
 
10
  # Code Compass Backend
11
 
12
- FastAPI backend for a session-oriented GitHub repo QA tool.
13
 
14
- Behavior:
15
 
16
- - Clones a public GitHub repo
17
- - Chunks it with tree-sitter
18
- - Builds retrieval state with a Qdrant adapter
19
- - Answers questions with Groq-hosted Llama or Amazon Bedrock Claude depending on environment configuration
20
- - Deletes the cloned repo after indexing
21
- - Keeps only lightweight repo metadata in SQLite
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Code Compass API
 
3
  colorFrom: blue
4
  colorTo: indigo
5
  sdk: docker
 
8
 
9
  # Code Compass Backend
10
 
11
+ FastAPI backend for Code Compass, a personal full-stack RAG project that indexes public GitHub repositories and answers questions with grounded source citations.
12
 
13
+ ## What This Demonstrates
14
 
15
+ - End-to-end AI application design, not just a prompt wrapper
16
+ - Backend API design with FastAPI, Pydantic validation, and session-scoped state
17
+ - Code-aware retrieval using tree-sitter chunking, vector search, BM25, rank fusion, and reranking
18
+ - Grounded answer generation with file-level citations
19
+ - Deployment-aware tradeoffs for cost, model choice, and free-tier infrastructure
20
+ - Evaluation workflow prepared for retrieval and answer-quality metrics
21
+
22
+ ## Backend Responsibilities
23
+
24
+ - Clone a public GitHub repository into temporary storage
25
+ - Filter and chunk source files for retrieval
26
+ - Generate embeddings and store chunks in Qdrant
27
+ - Maintain lightweight repository and session metadata in SQLite
28
+ - Run indexing as a background task
29
+ - Retrieve evidence with semantic search, lexical search, fusion, and reranking
30
+ - Generate answers from the selected context and return citations to the UI
31
+ - Delete cloned repository files after indexing
32
+
33
+ ## Runtime Configuration
34
+
35
+ Local development is configured for higher-quality experimentation:
36
+
37
+ - `LLM_PROVIDER=bedrock`
38
+ - `EMBEDDING_PROVIDER=bedrock`
39
+ - Claude on Amazon Bedrock for answer generation
40
+ - Cohere Embed on Amazon Bedrock for semantic retrieval
41
+
42
+ Production is configured for lower-cost hosting:
43
+
44
+ - `LLM_PROVIDER=groq`
45
+ - `EMBEDDING_PROVIDER=local`
46
+ - Groq-hosted Llama for answer generation
47
+ - Local sentence-transformer embeddings for retrieval
48
+ - Qdrant Cloud for vector storage
49
+
50
+ ## Qdrant Keepalive
51
+
52
+ The backend starts a lightweight Qdrant keepalive scheduler when `QDRANT_URL` is configured. It calls the configured collection every 12 hours by default so a free-tier Qdrant cluster does not become inactive while the backend process is running.
53
+
54
+ Configuration:
55
+
56
+ - `QDRANT_URL`
57
+ - `QDRANT_API_KEY`
58
+ - `QDRANT_COLLECTION=repo_qa_chunks`
59
+ - `QDRANT_KEEPALIVE_ENABLED=true`
60
+ - `QDRANT_KEEPALIVE_INTERVAL_SECONDS=43200`
61
+
62
+ The main repository also includes a GitHub Actions keepalive workflow for cases where the backend host is asleep.
63
+
64
+ ## Metrics
65
+
66
+ Metrics will be added after the next benchmark rerun. The evaluation harness is set up to report retrieval hit rate, top-1 hit rate, mean reciprocal rank, source recall, grounded answer rate, checklist pass rate, and optional RAGAS judge metrics.
server_app.py CHANGED
@@ -8,6 +8,7 @@ from pydantic import BaseModel, Field, HttpUrl
8
  from dotenv import load_dotenv
9
 
10
  from src.bedrock_claude import BedrockTransientError, is_bedrock_retryable_error
 
11
  from src.rag_system import CodebaseRAGSystem
12
 
13
  load_dotenv(Path(__file__).with_name(".env"))
@@ -34,6 +35,7 @@ app.add_middleware(
34
  )
35
 
36
  rag_system: Optional[CodebaseRAGSystem] = None
 
37
 
38
 
39
  class RepoIndexRequest(BaseModel):
@@ -60,9 +62,17 @@ def require_session_id(x_session_id: Optional[str] = Header(None, alias="X-Sessi
60
 
61
  @app.on_event("startup")
62
  def startup():
63
- global rag_system
64
  Path("./data").mkdir(exist_ok=True)
65
  rag_system = CodebaseRAGSystem()
 
 
 
 
 
 
 
 
66
 
67
 
68
  @app.get("/")
 
8
  from dotenv import load_dotenv
9
 
10
  from src.bedrock_claude import BedrockTransientError, is_bedrock_retryable_error
11
+ from src.qdrant_keepalive import QdrantKeepAliveScheduler
12
  from src.rag_system import CodebaseRAGSystem
13
 
14
  load_dotenv(Path(__file__).with_name(".env"))
 
35
  )
36
 
37
  rag_system: Optional[CodebaseRAGSystem] = None
38
+ qdrant_keepalive: Optional[QdrantKeepAliveScheduler] = None
39
 
40
 
41
  class RepoIndexRequest(BaseModel):
 
62
 
63
  @app.on_event("startup")
64
  def startup():
65
+ global qdrant_keepalive, rag_system
66
  Path("./data").mkdir(exist_ok=True)
67
  rag_system = CodebaseRAGSystem()
68
+ qdrant_keepalive = QdrantKeepAliveScheduler(rag_system.vector_store)
69
+ qdrant_keepalive.start()
70
+
71
+
72
+ @app.on_event("shutdown")
73
+ def shutdown():
74
+ if qdrant_keepalive is not None:
75
+ qdrant_keepalive.stop()
76
 
77
 
78
  @app.get("/")
src/document_processor.py CHANGED
@@ -5,8 +5,9 @@ import pypdf
5
 
6
 
7
  class DocumentProcessor:
8
-
9
- def __init__(self, chunk_size: int = 512, chunk_overlap: int = 50):
 
10
  self.chunk_size = chunk_size
11
  self.chunk_overlap = chunk_overlap
12
 
@@ -19,7 +20,6 @@ class DocumentProcessor:
19
  text += page.extract_text() + "\n"
20
  except Exception as e:
21
  raise ValueError(f"Error reading PDF: {str(e)}")
22
-
23
  return text.strip()
24
 
25
  def chunk_text(self, text: str) -> List[str]:
@@ -35,24 +35,34 @@ class DocumentProcessor:
35
  chunk = text[start:end]
36
 
37
  if end < text_length:
 
 
 
38
  last_period = chunk.rfind(".")
39
  last_newline = chunk.rfind("\n")
40
- break_point = max(last_period, last_newline)
41
 
42
- if break_point > self.chunk_size * 0.5:
43
- chunk = chunk[: break_point + 1]
44
- end = start + break_point + 1
 
 
 
 
 
 
 
 
45
 
46
- chunks.append(chunk.strip())
 
 
47
 
48
  start = end - self.chunk_overlap
49
 
50
  return [c for c in chunks if c]
51
 
52
  def process_document(self, file_path: str) -> Tuple[str, List[str]]:
53
-
54
  file_ext = Path(file_path).suffix.lower()
55
-
56
  if file_ext == ".pdf":
57
  text = self.extract_text_from_pdf(file_path)
58
  elif file_ext == ".txt":
@@ -62,7 +72,6 @@ class DocumentProcessor:
62
  raise ValueError(f"Unsupported file type: {file_ext}")
63
 
64
  chunks = self.chunk_text(text)
65
-
66
  return text, chunks
67
 
68
  @staticmethod
@@ -71,4 +80,4 @@ class DocumentProcessor:
71
  with open(file_path, "rb") as f:
72
  for chunk in iter(lambda: f.read(4096), b""):
73
  hash_md5.update(chunk)
74
- return hash_md5.hexdigest()
 
5
 
6
 
7
  class DocumentProcessor:
8
+ # FIX: 512 chars was too small — caused mid-thought splits that hurt faithfulness.
9
+ # 1200 chars with 150 overlap keeps full function signatures + docstrings intact.
10
+ def __init__(self, chunk_size: int = 1200, chunk_overlap: int = 150):
11
  self.chunk_size = chunk_size
12
  self.chunk_overlap = chunk_overlap
13
 
 
20
  text += page.extract_text() + "\n"
21
  except Exception as e:
22
  raise ValueError(f"Error reading PDF: {str(e)}")
 
23
  return text.strip()
24
 
25
  def chunk_text(self, text: str) -> List[str]:
 
35
  chunk = text[start:end]
36
 
37
  if end < text_length:
38
+ # FIX: prefer paragraph breaks > sentence breaks > newlines.
39
+ # Previously only checked period + newline, missing paragraph boundaries.
40
+ last_double_newline = chunk.rfind("\n\n")
41
  last_period = chunk.rfind(".")
42
  last_newline = chunk.rfind("\n")
 
43
 
44
+ if last_double_newline > self.chunk_size * 0.4:
45
+ break_point = last_double_newline + 1
46
+ elif last_period > self.chunk_size * 0.5:
47
+ break_point = last_period + 1
48
+ elif last_newline > self.chunk_size * 0.4:
49
+ break_point = last_newline + 1
50
+ else:
51
+ break_point = self.chunk_size
52
+
53
+ chunk = chunk[:break_point]
54
+ end = start + break_point
55
 
56
+ stripped = chunk.strip()
57
+ if stripped:
58
+ chunks.append(stripped)
59
 
60
  start = end - self.chunk_overlap
61
 
62
  return [c for c in chunks if c]
63
 
64
  def process_document(self, file_path: str) -> Tuple[str, List[str]]:
 
65
  file_ext = Path(file_path).suffix.lower()
 
66
  if file_ext == ".pdf":
67
  text = self.extract_text_from_pdf(file_path)
68
  elif file_ext == ".txt":
 
72
  raise ValueError(f"Unsupported file type: {file_ext}")
73
 
74
  chunks = self.chunk_text(text)
 
75
  return text, chunks
76
 
77
  @staticmethod
 
80
  with open(file_path, "rb") as f:
81
  for chunk in iter(lambda: f.read(4096), b""):
82
  hash_md5.update(chunk)
83
+ return hash_md5.hexdigest()
src/hybrid_search.py CHANGED
@@ -1,4 +1,3 @@
1
- import math
2
  import re
3
  from collections import defaultdict
4
  from typing import List
@@ -6,7 +5,6 @@ from typing import List
6
  from rank_bm25 import BM25Okapi
7
  from sentence_transformers import CrossEncoder
8
 
9
-
10
  TOKEN_RE = re.compile(r"[A-Za-z_][A-Za-z0-9_./:-]*")
11
 
12
 
@@ -27,7 +25,6 @@ class HybridSearchEngine:
27
  def bm25_search(self, chunks: List[dict], query: str, top_k: int = 12) -> List[dict]:
28
  if not chunks:
29
  return []
30
-
31
  tokens = tokenize(query)
32
  if not tokens:
33
  return []
@@ -73,6 +70,11 @@ class HybridSearchEngine:
73
  return merged[:top_k]
74
 
75
  def rerank(self, query: str, candidates: List[dict], top_k: int = 6) -> List[dict]:
 
 
 
 
 
76
  if not candidates:
77
  return []
78
 
@@ -99,4 +101,4 @@ class HybridSearchEngine:
99
  enriched["semantic_rank"] = rank
100
  enriched["semantic_score"] = float(item.get("semantic_score", 0.0))
101
  normalized.append(enriched)
102
- return normalized
 
 
1
  import re
2
  from collections import defaultdict
3
  from typing import List
 
5
  from rank_bm25 import BM25Okapi
6
  from sentence_transformers import CrossEncoder
7
 
 
8
  TOKEN_RE = re.compile(r"[A-Za-z_][A-Za-z0-9_./:-]*")
9
 
10
 
 
25
  def bm25_search(self, chunks: List[dict], query: str, top_k: int = 12) -> List[dict]:
26
  if not chunks:
27
  return []
 
28
  tokens = tokenize(query)
29
  if not tokens:
30
  return []
 
70
  return merged[:top_k]
71
 
72
  def rerank(self, query: str, candidates: List[dict], top_k: int = 6) -> List[dict]:
73
+ """
74
+ FIX: top_k now defaults to 6 and callers should pass a small final number (4-6),
75
+ NOT search_depth (which was up to 120). Reranking 120 items then dumping them
76
+ all into the LLM context was the main faithfulness killer.
77
+ """
78
  if not candidates:
79
  return []
80
 
 
101
  enriched["semantic_rank"] = rank
102
  enriched["semantic_score"] = float(item.get("semantic_score", 0.0))
103
  normalized.append(enriched)
104
+ return normalized
src/qdrant_keepalive.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import threading
3
+ from typing import Optional
4
+
5
+ from src.vector_store import QdrantVectorStore
6
+
7
+
8
+ class QdrantKeepAliveScheduler:
9
+ def __init__(self, vector_store: QdrantVectorStore):
10
+ self.vector_store = vector_store
11
+ self.interval_seconds = self._interval_seconds()
12
+ self.run_on_start = self._env_flag("QDRANT_KEEPALIVE_RUN_ON_START", True)
13
+ self.keepalive_enabled = self._env_flag("QDRANT_KEEPALIVE_ENABLED", True)
14
+ self.enabled = self.keepalive_enabled and self.vector_store.is_remote()
15
+ self._stop_event = threading.Event()
16
+ self._thread: Optional[threading.Thread] = None
17
+
18
+ def start(self):
19
+ if not self.enabled:
20
+ reason = (
21
+ "disabled by QDRANT_KEEPALIVE_ENABLED"
22
+ if not self.keepalive_enabled
23
+ else "set QDRANT_URL to enable remote Qdrant pings"
24
+ )
25
+ print(
26
+ f"[qdrant-keepalive] Disabled; {reason}",
27
+ flush=True,
28
+ )
29
+ return
30
+ if self._thread and self._thread.is_alive():
31
+ return
32
+
33
+ self._stop_event.clear()
34
+ self._thread = threading.Thread(
35
+ target=self._run,
36
+ name="qdrant-keepalive",
37
+ daemon=True,
38
+ )
39
+ self._thread.start()
40
+ print(
41
+ f"[qdrant-keepalive] Started interval_seconds={self.interval_seconds}",
42
+ flush=True,
43
+ )
44
+
45
+ def stop(self):
46
+ self._stop_event.set()
47
+ if self._thread and self._thread.is_alive():
48
+ self._thread.join(timeout=5)
49
+ self._thread = None
50
+
51
+ def _run(self):
52
+ if self.run_on_start:
53
+ self._ping()
54
+
55
+ while not self._stop_event.wait(self.interval_seconds):
56
+ self._ping()
57
+
58
+ def _ping(self):
59
+ try:
60
+ stats = self.vector_store.keep_alive()
61
+ print(
62
+ "[qdrant-keepalive] Ping succeeded "
63
+ f"collection={stats['collection_name']} "
64
+ f"points={stats['total_vectors']}",
65
+ flush=True,
66
+ )
67
+ except Exception as exc:
68
+ print(f"[qdrant-keepalive] Ping failed: {exc}", flush=True)
69
+
70
+ @staticmethod
71
+ def _env_flag(name: str, default: bool) -> bool:
72
+ value = os.getenv(name)
73
+ if value is None:
74
+ return default
75
+ return value.strip().lower() not in {"0", "false", "no", "off"}
76
+
77
+ @staticmethod
78
+ def _interval_seconds() -> int:
79
+ value = os.getenv("QDRANT_KEEPALIVE_INTERVAL_SECONDS", "43200")
80
+ try:
81
+ return max(60, int(value))
82
+ except ValueError:
83
+ return 43200
src/rag_system.py CHANGED
@@ -333,7 +333,7 @@ class CodebaseRAGSystem:
333
  session_key: str,
334
  question: str,
335
  top_k: int = 8,
336
- history: Optional[List[object]] = None,
337
  ) -> dict:
338
  session = get_db_session(self.database_url)
339
  try:
@@ -369,8 +369,10 @@ class CodebaseRAGSystem:
369
  else top_k * shallow_multiplier
370
  )
371
  search_depth = max(top_k, min(search_depth, 120))
 
372
  retrieval_query = self._build_retrieval_query(question, normalized_history)
373
  query_embedding = self.embedder.embed_text(retrieval_query)
 
374
  semantic_hits = []
375
  for score, meta in self.vector_store.search(query_embedding, k=search_depth, repo_filter=repo_id):
376
  serialized = dict(meta)
@@ -384,6 +386,7 @@ class CodebaseRAGSystem:
384
  )
385
  semantic_hits = self.hybrid_search.normalize_semantic_results(semantic_hits)
386
  fused = self.hybrid_search.reciprocal_rank_fusion(lexical_hits, semantic_hits, top_k=search_depth)
 
387
  path_hits = self._path_intent_search(
388
  self.repo_chunks[repo_id],
389
  question,
@@ -391,13 +394,25 @@ class CodebaseRAGSystem:
391
  top_k=search_depth,
392
  )
393
  fused = self._merge_ranked_candidates(fused, path_hits, top_k=search_depth)
 
394
  rerank_query = retrieval_query if question_intent in deep_search_intents else question
395
- reranked = self.hybrid_search.rerank(rerank_query, fused, top_k=search_depth)
 
 
 
 
 
 
 
396
  reranked = self._prioritize_results(question, retrieval_query, reranked, top_k=top_k)
397
- reranked = self._select_answer_sources(question, reranked, top_k=top_k)
398
 
399
- answer = self._generate_answer(repo, question, reranked, normalized_history)
 
 
 
 
400
 
 
401
  return answer
402
  finally:
403
  session.close()
@@ -411,12 +426,13 @@ class CodebaseRAGSystem:
411
  finally:
412
  session.close()
413
 
 
414
  def _generate_answer(
415
  self,
416
- repo: Repository,
417
  question: str,
418
- sources: List[dict],
419
- history: Optional[List[dict]] = None,
420
  ) -> dict:
421
  if not sources:
422
  return {
@@ -428,7 +444,10 @@ class CodebaseRAGSystem:
428
 
429
  context_blocks = []
430
  slim_sources = []
 
431
  for index, source in enumerate(sources, start=1):
 
 
432
  context_blocks.append(
433
  "\n".join(
434
  [
@@ -436,7 +455,7 @@ class CodebaseRAGSystem:
436
  f"File: {source['file_path']}",
437
  f"Symbol: {source['symbol_name']}",
438
  f"Lines: {source['line_start']}-{source['line_end']}",
439
- source["content"][:2500],
440
  ]
441
  )
442
  )
@@ -495,17 +514,23 @@ Rules:
495
  """
496
 
497
  joined_context = "\n\n".join(context_blocks)
 
 
498
  user_prompt = f"""
499
  Repository: {repo.owner}/{repo.name}
500
- Question: {question}
 
 
 
501
  Recent conversation:
502
  {self._format_history(history or [])}
503
 
504
- Context:
505
- {joined_context}
506
  """
507
 
508
  answer_text, finish_reason = self._generate_markdown_response(system_prompt, user_prompt)
 
509
  if self._looks_incomplete(answer_text, finish_reason):
510
  repair_prompt = f"""
511
  The draft answer below appears to be cut off or incomplete.
@@ -519,7 +544,7 @@ Draft answer:
519
  f"{user_prompt.strip()}\n\n{repair_prompt.strip()}",
520
  )
521
  if self._looks_incomplete(answer_text, finish_reason):
522
- short_prompt = f"""
523
  Answer the question again, but keep it concise and complete.
524
  Use 2 short paragraphs or 4-6 bullets max.
525
  Do not leave the answer unfinished.
@@ -528,6 +553,7 @@ Do not leave the answer unfinished.
528
  system_prompt,
529
  f"{user_prompt.strip()}\n\n{short_prompt.strip()}",
530
  )
 
531
  answer_text = self._finalize_answer(answer_text)
532
  confidence = self._estimate_confidence(sources)
533
  summary = " ".join(answer_text.split())[:160] if answer_text else ""
 
333
  session_key: str,
334
  question: str,
335
  top_k: int = 8,
336
+ history=None,
337
  ) -> dict:
338
  session = get_db_session(self.database_url)
339
  try:
 
369
  else top_k * shallow_multiplier
370
  )
371
  search_depth = max(top_k, min(search_depth, 120))
372
+
373
  retrieval_query = self._build_retrieval_query(question, normalized_history)
374
  query_embedding = self.embedder.embed_text(retrieval_query)
375
+
376
  semantic_hits = []
377
  for score, meta in self.vector_store.search(query_embedding, k=search_depth, repo_filter=repo_id):
378
  serialized = dict(meta)
 
386
  )
387
  semantic_hits = self.hybrid_search.normalize_semantic_results(semantic_hits)
388
  fused = self.hybrid_search.reciprocal_rank_fusion(lexical_hits, semantic_hits, top_k=search_depth)
389
+
390
  path_hits = self._path_intent_search(
391
  self.repo_chunks[repo_id],
392
  question,
 
394
  top_k=search_depth,
395
  )
396
  fused = self._merge_ranked_candidates(fused, path_hits, top_k=search_depth)
397
+
398
  rerank_query = retrieval_query if question_intent in deep_search_intents else question
399
+
400
+ # FIX: rerank to a small candidate pool first (20), then let
401
+ # _prioritize_results and _select_answer_sources trim to final top_k.
402
+ # Previously rerank was called with search_depth (up to 120), meaning
403
+ # the LLM received far too many chunks and faithfulness dropped.
404
+ rerank_pool = min(search_depth, 20)
405
+ reranked = self.hybrid_search.rerank(rerank_query, fused, top_k=rerank_pool)
406
+
407
  reranked = self._prioritize_results(question, retrieval_query, reranked, top_k=top_k)
 
408
 
409
+ # FIX: cap final sources at 5 instead of top_k (8).
410
+ # 5 sources × 1500 chars = ~7500 chars context, which the LLM handles well.
411
+ # 8 sources × 2500 chars = ~20000 chars, which causes lost-in-the-middle issues.
412
+ final_top_k = min(top_k, 5)
413
+ reranked = self._select_answer_sources(question, reranked, top_k=final_top_k)
414
 
415
+ answer = self._generate_answer(repo, question, reranked, normalized_history)
416
  return answer
417
  finally:
418
  session.close()
 
426
  finally:
427
  session.close()
428
 
429
+
430
  def _generate_answer(
431
  self,
432
+ repo,
433
  question: str,
434
+ sources: list,
435
+ history=None,
436
  ) -> dict:
437
  if not sources:
438
  return {
 
444
 
445
  context_blocks = []
446
  slim_sources = []
447
+
448
  for index, source in enumerate(sources, start=1):
449
+ content_preview = source["content"][:1500]
450
+
451
  context_blocks.append(
452
  "\n".join(
453
  [
 
455
  f"File: {source['file_path']}",
456
  f"Symbol: {source['symbol_name']}",
457
  f"Lines: {source['line_start']}-{source['line_end']}",
458
+ content_preview,
459
  ]
460
  )
461
  )
 
514
  """
515
 
516
  joined_context = "\n\n".join(context_blocks)
517
+
518
+ # FIX: context is placed BEFORE the question (prompt ordering fix).
519
  user_prompt = f"""
520
  Repository: {repo.owner}/{repo.name}
521
+
522
+ Context from the codebase:
523
+ {joined_context}
524
+
525
  Recent conversation:
526
  {self._format_history(history or [])}
527
 
528
+ Now answer this question using only the context above:
529
+ {question}
530
  """
531
 
532
  answer_text, finish_reason = self._generate_markdown_response(system_prompt, user_prompt)
533
+
534
  if self._looks_incomplete(answer_text, finish_reason):
535
  repair_prompt = f"""
536
  The draft answer below appears to be cut off or incomplete.
 
544
  f"{user_prompt.strip()}\n\n{repair_prompt.strip()}",
545
  )
546
  if self._looks_incomplete(answer_text, finish_reason):
547
+ short_prompt = """
548
  Answer the question again, but keep it concise and complete.
549
  Use 2 short paragraphs or 4-6 bullets max.
550
  Do not leave the answer unfinished.
 
553
  system_prompt,
554
  f"{user_prompt.strip()}\n\n{short_prompt.strip()}",
555
  )
556
+
557
  answer_text = self._finalize_answer(answer_text)
558
  confidence = self._estimate_confidence(sources)
559
  summary = " ".join(answer_text.split())[:160] if answer_text else ""
src/vector_store.py CHANGED
@@ -11,18 +11,18 @@ class QdrantVectorStore:
11
  self.embedding_dim = embedding_dim
12
  self.collection_name = os.getenv("QDRANT_COLLECTION", "repo_qa_chunks")
13
  self.upsert_batch_size = max(1, int(os.getenv("QDRANT_UPSERT_BATCH_SIZE", "64")))
 
 
 
14
  self.client = self._create_client()
15
  self._ensure_collection()
16
 
17
  def _create_client(self):
18
- url = self._clean_env("QDRANT_URL")
19
- api_key = self._clean_env("QDRANT_API_KEY")
20
- timeout = int(os.getenv("QDRANT_TIMEOUT_SECONDS", "120"))
21
- if url:
22
  return QdrantClient(
23
- url=url,
24
- api_key=api_key,
25
- timeout=timeout,
26
  check_compatibility=False,
27
  )
28
  return QdrantClient(":memory:")
@@ -149,6 +149,16 @@ class QdrantVectorStore:
149
  def load(self):
150
  self._ensure_collection()
151
 
 
 
 
 
 
 
 
 
 
 
152
  def get_stats(self) -> dict:
153
  info = self.client.get_collection(self.collection_name)
154
  return {
 
11
  self.embedding_dim = embedding_dim
12
  self.collection_name = os.getenv("QDRANT_COLLECTION", "repo_qa_chunks")
13
  self.upsert_batch_size = max(1, int(os.getenv("QDRANT_UPSERT_BATCH_SIZE", "64")))
14
+ self.qdrant_url = self._clean_env("QDRANT_URL")
15
+ self.qdrant_api_key = self._clean_env("QDRANT_API_KEY")
16
+ self.timeout = int(os.getenv("QDRANT_TIMEOUT_SECONDS", "120"))
17
  self.client = self._create_client()
18
  self._ensure_collection()
19
 
20
  def _create_client(self):
21
+ if self.qdrant_url:
 
 
 
22
  return QdrantClient(
23
+ url=self.qdrant_url,
24
+ api_key=self.qdrant_api_key,
25
+ timeout=self.timeout,
26
  check_compatibility=False,
27
  )
28
  return QdrantClient(":memory:")
 
149
  def load(self):
150
  self._ensure_collection()
151
 
152
+ def is_remote(self) -> bool:
153
+ return self.qdrant_url is not None
154
+
155
+ def keep_alive(self) -> dict:
156
+ info = self.client.get_collection(self.collection_name)
157
+ return {
158
+ "total_vectors": info.points_count or 0,
159
+ "collection_name": self.collection_name,
160
+ }
161
+
162
  def get_stats(self) -> dict:
163
  info = self.client.get_collection(self.collection_name)
164
  return {