personabot-api / tests /test_enumerate_query.py
GitHub Actions
Deploy 8e14626
8da917e
# backend/tests/test_enumerate_query.py
# Unit tests for the enumeration query classifier (Fix 1) and
# the portfolio-relevance helper (Fix 2 Rule 1).
#
# All tests are pure-Python; no network calls, no Qdrant, no embedder.
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from app.pipeline.nodes.enumerate_query import (
_has_enumeration_intent,
_extract_source_types,
make_enumerate_query_node,
)
from app.core.portfolio_context import is_portfolio_relevant
# Patch target for LangGraph's stream writer, which requires a runnable context
# that doesn't exist in unit tests.
_WRITER_PATCH = "app.pipeline.nodes.enumerate_query.get_stream_writer"
# ---------------------------------------------------------------------------
# _has_enumeration_intent
# ---------------------------------------------------------------------------
class TestHasEnumerationIntent:
def test_list_all_projects(self):
assert _has_enumeration_intent("list all projects") is True
def test_list_projects_no_all(self):
assert _has_enumeration_intent("list projects") is True
def test_show_all_blogs(self):
assert _has_enumeration_intent("show all blog posts") is True
def test_how_many_blogs(self):
assert _has_enumeration_intent("how many blog posts do you have") is True
def test_count_projects(self):
assert _has_enumeration_intent("count projects") is True
def test_enumerate_skills(self):
assert _has_enumeration_intent("enumerate all skills") is True
def test_give_me_a_list_of(self):
assert _has_enumeration_intent("give me a list of your projects") is True
def test_what_are_all_the_projects(self):
# trailing-regex pattern: "what are all the X"
assert _has_enumeration_intent("what are all the projects") is True
def test_which_are_all_the_blogs(self):
# Requires "all" keyword — the trailing regex gate prevents over-triggering.
assert _has_enumeration_intent("which are all the blog posts") is True
def test_regular_how_query_no_intent(self):
assert _has_enumeration_intent("how does TextOps work") is False
def test_explain_query_no_intent(self):
assert _has_enumeration_intent("explain the architecture of PersonaBot") is False
def test_what_is_query_no_intent(self):
assert _has_enumeration_intent("what is echo-echo") is False
def test_tell_me_about_no_intent(self):
assert _has_enumeration_intent("tell me about your background") is False
def test_empty_string(self):
assert _has_enumeration_intent("") is False
# ---------------------------------------------------------------------------
# _extract_source_types
# ---------------------------------------------------------------------------
class TestExtractSourceTypes:
def test_projects(self):
types = _extract_source_types("list all projects")
assert "project" in types
def test_blogs(self):
types = _extract_source_types("show all blog posts")
assert "blog" in types
def test_skills_cv(self):
types = _extract_source_types("list all your skills")
assert "cv" in types
def test_generic_returns_empty(self):
# "everything" or "all" without a type token → [] meaning scroll all types
types = _extract_source_types("list everything")
assert types == []
def test_github_repos(self):
types = _extract_source_types("show all github repos")
assert "github" in types
def test_work_experience(self):
types = _extract_source_types("list all work experience")
assert "cv" in types
# ---------------------------------------------------------------------------
# make_enumerate_query_node
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_non_enumeration_query_passes_through():
"""A regular query must exit the node with is_enumeration_query=False."""
mock_vs = MagicMock()
mock_vs.scroll_by_source_type = MagicMock(return_value=[])
node = make_enumerate_query_node(mock_vs)
state = {"query": "how does TextOps work", "retrieval_attempts": 0}
with patch(_WRITER_PATCH, return_value=MagicMock()):
result = node(state)
assert result["is_enumeration_query"] is False
# Vector store must NOT be called for normal queries (zero cost guarantee).
mock_vs.scroll_by_source_type.assert_not_called()
@pytest.mark.asyncio
async def test_enumeration_query_sets_flag_and_populates_chunks():
"""An enumeration query must call scroll and set is_enumeration_query=True."""
chunk_a = {
"text": "TextOps is a CLI toolkit.",
"metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-1"},
}
chunk_b = {
"text": "Echo-Echo is a WebRTC demo.",
"metadata": {"source_title": "Echo-Echo", "source_type": "project", "doc_id": "echo-1"},
}
mock_vs = MagicMock()
mock_vs.scroll_by_source_type = MagicMock(return_value=[chunk_a, chunk_b])
node = make_enumerate_query_node(mock_vs)
state = {"query": "list all projects", "retrieval_attempts": 0}
with patch(_WRITER_PATCH, return_value=MagicMock()):
result = node(state)
assert result["is_enumeration_query"] is True
assert len(result["reranked_chunks"]) == 2
mock_vs.scroll_by_source_type.assert_called_once()
@pytest.mark.asyncio
async def test_enumeration_deduplicates_by_source_title():
"""Duplicate source_title chunks must be collapsed to one representative."""
chunk_a = {
"text": "TextOps chunk 1",
"metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-1"},
}
chunk_b = {
"text": "TextOps chunk 2",
"metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-2"},
}
mock_vs = MagicMock()
mock_vs.scroll_by_source_type = MagicMock(return_value=[chunk_a, chunk_b])
node = make_enumerate_query_node(mock_vs)
state = {"query": "list all projects", "retrieval_attempts": 0}
with patch(_WRITER_PATCH, return_value=MagicMock()):
result = node(state)
assert result["is_enumeration_query"] is True
assert len(result["reranked_chunks"]) == 1
@pytest.mark.asyncio
async def test_enumeration_empty_scroll_returns_not_found():
"""When Qdrant returns no chunks, is_enumeration_query stays False (no results to list)."""
mock_vs = MagicMock()
mock_vs.scroll_by_source_type = MagicMock(return_value=[])
node = make_enumerate_query_node(mock_vs)
state = {"query": "list all projects", "retrieval_attempts": 0}
with patch(_WRITER_PATCH, return_value=MagicMock()):
result = node(state)
# With no chunks, the node does not commit to enumeration path; falls to RAG.
assert result["is_enumeration_query"] is False
# ---------------------------------------------------------------------------
# is_portfolio_relevant (Fix 2 Rule 1)
# ---------------------------------------------------------------------------
class TestIsPortfolioRelevant:
def test_known_project_name(self):
assert is_portfolio_relevant("how does textops work") is True
def test_known_project_variant(self):
assert is_portfolio_relevant("tell me about echo echo") is True
def test_known_technology(self):
assert is_portfolio_relevant("explain the use of langchain in your stack") is True
def test_known_organisation(self):
assert is_portfolio_relevant("what did you do at vk live") is True
def test_unrelated_query(self):
assert is_portfolio_relevant("what is the weather in london") is False
def test_generic_question(self):
assert is_portfolio_relevant("tell me a joke") is False
def test_empty_string(self):
assert is_portfolio_relevant("") is False
def test_resume_intent_keywords_are_relevant(self):
assert is_portfolio_relevant("tell me about his work experience") is True
def test_stt_typo_work_experience_is_still_relevant(self):
assert is_portfolio_relevant("tell me about his walk experience") is True
def test_tech_stack_intent_is_relevant(self):
assert is_portfolio_relevant("Could you tell me about his tech stack?") is True
def test_professional_setting_work_experience_is_relevant(self):
assert is_portfolio_relevant("What work experience do you have in a professional setting") is True
def test_tech_stack_use_phrase_is_relevant(self):
assert is_portfolio_relevant("What tech stack does he use") is True