Spaces:
Running
Running
| # backend/tests/test_enumerate_query.py | |
| # Unit tests for the enumeration query classifier (Fix 1) and | |
| # the portfolio-relevance helper (Fix 2 Rule 1). | |
| # | |
| # All tests are pure-Python; no network calls, no Qdrant, no embedder. | |
| import pytest | |
| from unittest.mock import AsyncMock, MagicMock, patch | |
| from app.pipeline.nodes.enumerate_query import ( | |
| _has_enumeration_intent, | |
| _extract_source_types, | |
| make_enumerate_query_node, | |
| ) | |
| from app.core.portfolio_context import is_portfolio_relevant | |
| # Patch target for LangGraph's stream writer, which requires a runnable context | |
| # that doesn't exist in unit tests. | |
| _WRITER_PATCH = "app.pipeline.nodes.enumerate_query.get_stream_writer" | |
| # --------------------------------------------------------------------------- | |
| # _has_enumeration_intent | |
| # --------------------------------------------------------------------------- | |
| class TestHasEnumerationIntent: | |
| def test_list_all_projects(self): | |
| assert _has_enumeration_intent("list all projects") is True | |
| def test_list_projects_no_all(self): | |
| assert _has_enumeration_intent("list projects") is True | |
| def test_show_all_blogs(self): | |
| assert _has_enumeration_intent("show all blog posts") is True | |
| def test_how_many_blogs(self): | |
| assert _has_enumeration_intent("how many blog posts do you have") is True | |
| def test_count_projects(self): | |
| assert _has_enumeration_intent("count projects") is True | |
| def test_enumerate_skills(self): | |
| assert _has_enumeration_intent("enumerate all skills") is True | |
| def test_give_me_a_list_of(self): | |
| assert _has_enumeration_intent("give me a list of your projects") is True | |
| def test_what_are_all_the_projects(self): | |
| # trailing-regex pattern: "what are all the X" | |
| assert _has_enumeration_intent("what are all the projects") is True | |
| def test_which_are_all_the_blogs(self): | |
| # Requires "all" keyword — the trailing regex gate prevents over-triggering. | |
| assert _has_enumeration_intent("which are all the blog posts") is True | |
| def test_regular_how_query_no_intent(self): | |
| assert _has_enumeration_intent("how does TextOps work") is False | |
| def test_explain_query_no_intent(self): | |
| assert _has_enumeration_intent("explain the architecture of PersonaBot") is False | |
| def test_what_is_query_no_intent(self): | |
| assert _has_enumeration_intent("what is echo-echo") is False | |
| def test_tell_me_about_no_intent(self): | |
| assert _has_enumeration_intent("tell me about your background") is False | |
| def test_empty_string(self): | |
| assert _has_enumeration_intent("") is False | |
| # --------------------------------------------------------------------------- | |
| # _extract_source_types | |
| # --------------------------------------------------------------------------- | |
| class TestExtractSourceTypes: | |
| def test_projects(self): | |
| types = _extract_source_types("list all projects") | |
| assert "project" in types | |
| def test_blogs(self): | |
| types = _extract_source_types("show all blog posts") | |
| assert "blog" in types | |
| def test_skills_cv(self): | |
| types = _extract_source_types("list all your skills") | |
| assert "cv" in types | |
| def test_generic_returns_empty(self): | |
| # "everything" or "all" without a type token → [] meaning scroll all types | |
| types = _extract_source_types("list everything") | |
| assert types == [] | |
| def test_github_repos(self): | |
| types = _extract_source_types("show all github repos") | |
| assert "github" in types | |
| def test_work_experience(self): | |
| types = _extract_source_types("list all work experience") | |
| assert "cv" in types | |
| # --------------------------------------------------------------------------- | |
| # make_enumerate_query_node | |
| # --------------------------------------------------------------------------- | |
| async def test_non_enumeration_query_passes_through(): | |
| """A regular query must exit the node with is_enumeration_query=False.""" | |
| mock_vs = MagicMock() | |
| mock_vs.scroll_by_source_type = MagicMock(return_value=[]) | |
| node = make_enumerate_query_node(mock_vs) | |
| state = {"query": "how does TextOps work", "retrieval_attempts": 0} | |
| with patch(_WRITER_PATCH, return_value=MagicMock()): | |
| result = node(state) | |
| assert result["is_enumeration_query"] is False | |
| # Vector store must NOT be called for normal queries (zero cost guarantee). | |
| mock_vs.scroll_by_source_type.assert_not_called() | |
| async def test_enumeration_query_sets_flag_and_populates_chunks(): | |
| """An enumeration query must call scroll and set is_enumeration_query=True.""" | |
| chunk_a = { | |
| "text": "TextOps is a CLI toolkit.", | |
| "metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-1"}, | |
| } | |
| chunk_b = { | |
| "text": "Echo-Echo is a WebRTC demo.", | |
| "metadata": {"source_title": "Echo-Echo", "source_type": "project", "doc_id": "echo-1"}, | |
| } | |
| mock_vs = MagicMock() | |
| mock_vs.scroll_by_source_type = MagicMock(return_value=[chunk_a, chunk_b]) | |
| node = make_enumerate_query_node(mock_vs) | |
| state = {"query": "list all projects", "retrieval_attempts": 0} | |
| with patch(_WRITER_PATCH, return_value=MagicMock()): | |
| result = node(state) | |
| assert result["is_enumeration_query"] is True | |
| assert len(result["reranked_chunks"]) == 2 | |
| mock_vs.scroll_by_source_type.assert_called_once() | |
| async def test_enumeration_deduplicates_by_source_title(): | |
| """Duplicate source_title chunks must be collapsed to one representative.""" | |
| chunk_a = { | |
| "text": "TextOps chunk 1", | |
| "metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-1"}, | |
| } | |
| chunk_b = { | |
| "text": "TextOps chunk 2", | |
| "metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-2"}, | |
| } | |
| mock_vs = MagicMock() | |
| mock_vs.scroll_by_source_type = MagicMock(return_value=[chunk_a, chunk_b]) | |
| node = make_enumerate_query_node(mock_vs) | |
| state = {"query": "list all projects", "retrieval_attempts": 0} | |
| with patch(_WRITER_PATCH, return_value=MagicMock()): | |
| result = node(state) | |
| assert result["is_enumeration_query"] is True | |
| assert len(result["reranked_chunks"]) == 1 | |
| async def test_enumeration_empty_scroll_returns_not_found(): | |
| """When Qdrant returns no chunks, is_enumeration_query stays False (no results to list).""" | |
| mock_vs = MagicMock() | |
| mock_vs.scroll_by_source_type = MagicMock(return_value=[]) | |
| node = make_enumerate_query_node(mock_vs) | |
| state = {"query": "list all projects", "retrieval_attempts": 0} | |
| with patch(_WRITER_PATCH, return_value=MagicMock()): | |
| result = node(state) | |
| # With no chunks, the node does not commit to enumeration path; falls to RAG. | |
| assert result["is_enumeration_query"] is False | |
| # --------------------------------------------------------------------------- | |
| # is_portfolio_relevant (Fix 2 Rule 1) | |
| # --------------------------------------------------------------------------- | |
| class TestIsPortfolioRelevant: | |
| def test_known_project_name(self): | |
| assert is_portfolio_relevant("how does textops work") is True | |
| def test_known_project_variant(self): | |
| assert is_portfolio_relevant("tell me about echo echo") is True | |
| def test_known_technology(self): | |
| assert is_portfolio_relevant("explain the use of langchain in your stack") is True | |
| def test_known_organisation(self): | |
| assert is_portfolio_relevant("what did you do at vk live") is True | |
| def test_unrelated_query(self): | |
| assert is_portfolio_relevant("what is the weather in london") is False | |
| def test_generic_question(self): | |
| assert is_portfolio_relevant("tell me a joke") is False | |
| def test_empty_string(self): | |
| assert is_portfolio_relevant("") is False | |
| def test_resume_intent_keywords_are_relevant(self): | |
| assert is_portfolio_relevant("tell me about his work experience") is True | |
| def test_stt_typo_work_experience_is_still_relevant(self): | |
| assert is_portfolio_relevant("tell me about his walk experience") is True | |
| def test_tech_stack_intent_is_relevant(self): | |
| assert is_portfolio_relevant("Could you tell me about his tech stack?") is True | |
| def test_professional_setting_work_experience_is_relevant(self): | |
| assert is_portfolio_relevant("What work experience do you have in a professional setting") is True | |
| def test_tech_stack_use_phrase_is_relevant(self): | |
| assert is_portfolio_relevant("What tech stack does he use") is True | |