# backend/tests/test_enumerate_query.py # Unit tests for the enumeration query classifier (Fix 1) and # the portfolio-relevance helper (Fix 2 Rule 1). # # All tests are pure-Python; no network calls, no Qdrant, no embedder. import pytest from unittest.mock import AsyncMock, MagicMock, patch from app.pipeline.nodes.enumerate_query import ( _has_enumeration_intent, _extract_source_types, make_enumerate_query_node, ) from app.core.portfolio_context import is_portfolio_relevant # Patch target for LangGraph's stream writer, which requires a runnable context # that doesn't exist in unit tests. _WRITER_PATCH = "app.pipeline.nodes.enumerate_query.get_stream_writer" # --------------------------------------------------------------------------- # _has_enumeration_intent # --------------------------------------------------------------------------- class TestHasEnumerationIntent: def test_list_all_projects(self): assert _has_enumeration_intent("list all projects") is True def test_list_projects_no_all(self): assert _has_enumeration_intent("list projects") is True def test_show_all_blogs(self): assert _has_enumeration_intent("show all blog posts") is True def test_how_many_blogs(self): assert _has_enumeration_intent("how many blog posts do you have") is True def test_count_projects(self): assert _has_enumeration_intent("count projects") is True def test_enumerate_skills(self): assert _has_enumeration_intent("enumerate all skills") is True def test_give_me_a_list_of(self): assert _has_enumeration_intent("give me a list of your projects") is True def test_what_are_all_the_projects(self): # trailing-regex pattern: "what are all the X" assert _has_enumeration_intent("what are all the projects") is True def test_which_are_all_the_blogs(self): # Requires "all" keyword — the trailing regex gate prevents over-triggering. assert _has_enumeration_intent("which are all the blog posts") is True def test_regular_how_query_no_intent(self): assert _has_enumeration_intent("how does TextOps work") is False def test_explain_query_no_intent(self): assert _has_enumeration_intent("explain the architecture of PersonaBot") is False def test_what_is_query_no_intent(self): assert _has_enumeration_intent("what is echo-echo") is False def test_tell_me_about_no_intent(self): assert _has_enumeration_intent("tell me about your background") is False def test_empty_string(self): assert _has_enumeration_intent("") is False # --------------------------------------------------------------------------- # _extract_source_types # --------------------------------------------------------------------------- class TestExtractSourceTypes: def test_projects(self): types = _extract_source_types("list all projects") assert "project" in types def test_blogs(self): types = _extract_source_types("show all blog posts") assert "blog" in types def test_skills_cv(self): types = _extract_source_types("list all your skills") assert "cv" in types def test_generic_returns_empty(self): # "everything" or "all" without a type token → [] meaning scroll all types types = _extract_source_types("list everything") assert types == [] def test_github_repos(self): types = _extract_source_types("show all github repos") assert "github" in types def test_work_experience(self): types = _extract_source_types("list all work experience") assert "cv" in types # --------------------------------------------------------------------------- # make_enumerate_query_node # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_non_enumeration_query_passes_through(): """A regular query must exit the node with is_enumeration_query=False.""" mock_vs = MagicMock() mock_vs.scroll_by_source_type = MagicMock(return_value=[]) node = make_enumerate_query_node(mock_vs) state = {"query": "how does TextOps work", "retrieval_attempts": 0} with patch(_WRITER_PATCH, return_value=MagicMock()): result = node(state) assert result["is_enumeration_query"] is False # Vector store must NOT be called for normal queries (zero cost guarantee). mock_vs.scroll_by_source_type.assert_not_called() @pytest.mark.asyncio async def test_enumeration_query_sets_flag_and_populates_chunks(): """An enumeration query must call scroll and set is_enumeration_query=True.""" chunk_a = { "text": "TextOps is a CLI toolkit.", "metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-1"}, } chunk_b = { "text": "Echo-Echo is a WebRTC demo.", "metadata": {"source_title": "Echo-Echo", "source_type": "project", "doc_id": "echo-1"}, } mock_vs = MagicMock() mock_vs.scroll_by_source_type = MagicMock(return_value=[chunk_a, chunk_b]) node = make_enumerate_query_node(mock_vs) state = {"query": "list all projects", "retrieval_attempts": 0} with patch(_WRITER_PATCH, return_value=MagicMock()): result = node(state) assert result["is_enumeration_query"] is True assert len(result["reranked_chunks"]) == 2 mock_vs.scroll_by_source_type.assert_called_once() @pytest.mark.asyncio async def test_enumeration_deduplicates_by_source_title(): """Duplicate source_title chunks must be collapsed to one representative.""" chunk_a = { "text": "TextOps chunk 1", "metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-1"}, } chunk_b = { "text": "TextOps chunk 2", "metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-2"}, } mock_vs = MagicMock() mock_vs.scroll_by_source_type = MagicMock(return_value=[chunk_a, chunk_b]) node = make_enumerate_query_node(mock_vs) state = {"query": "list all projects", "retrieval_attempts": 0} with patch(_WRITER_PATCH, return_value=MagicMock()): result = node(state) assert result["is_enumeration_query"] is True assert len(result["reranked_chunks"]) == 1 @pytest.mark.asyncio async def test_enumeration_empty_scroll_returns_not_found(): """When Qdrant returns no chunks, is_enumeration_query stays False (no results to list).""" mock_vs = MagicMock() mock_vs.scroll_by_source_type = MagicMock(return_value=[]) node = make_enumerate_query_node(mock_vs) state = {"query": "list all projects", "retrieval_attempts": 0} with patch(_WRITER_PATCH, return_value=MagicMock()): result = node(state) # With no chunks, the node does not commit to enumeration path; falls to RAG. assert result["is_enumeration_query"] is False # --------------------------------------------------------------------------- # is_portfolio_relevant (Fix 2 Rule 1) # --------------------------------------------------------------------------- class TestIsPortfolioRelevant: def test_known_project_name(self): assert is_portfolio_relevant("how does textops work") is True def test_known_project_variant(self): assert is_portfolio_relevant("tell me about echo echo") is True def test_known_technology(self): assert is_portfolio_relevant("explain the use of langchain in your stack") is True def test_known_organisation(self): assert is_portfolio_relevant("what did you do at vk live") is True def test_unrelated_query(self): assert is_portfolio_relevant("what is the weather in london") is False def test_generic_question(self): assert is_portfolio_relevant("tell me a joke") is False def test_empty_string(self): assert is_portfolio_relevant("") is False def test_resume_intent_keywords_are_relevant(self): assert is_portfolio_relevant("tell me about his work experience") is True def test_stt_typo_work_experience_is_still_relevant(self): assert is_portfolio_relevant("tell me about his walk experience") is True def test_tech_stack_intent_is_relevant(self): assert is_portfolio_relevant("Could you tell me about his tech stack?") is True def test_professional_setting_work_experience_is_relevant(self): assert is_portfolio_relevant("What work experience do you have in a professional setting") is True def test_tech_stack_use_phrase_is_relevant(self): assert is_portfolio_relevant("What tech stack does he use") is True