Spaces:

1337XCode
/

personabot-api

Running

personabot-api / tests /test_enumerate_query.py

GitHub Actions

Deploy 8e14626

8da917e 2 days ago

8.69 kB

	# backend/tests/test_enumerate_query.py
	# Unit tests for the enumeration query classifier (Fix 1) and
	# the portfolio-relevance helper (Fix 2 Rule 1).
	#
	# All tests are pure-Python; no network calls, no Qdrant, no embedder.

	import pytest
	from unittest.mock import AsyncMock, MagicMock, patch

	from app.pipeline.nodes.enumerate_query import (
	_has_enumeration_intent,
	_extract_source_types,
	make_enumerate_query_node,
	)
	from app.core.portfolio_context import is_portfolio_relevant

	# Patch target for LangGraph's stream writer, which requires a runnable context
	# that doesn't exist in unit tests.
	_WRITER_PATCH = "app.pipeline.nodes.enumerate_query.get_stream_writer"


	# ---------------------------------------------------------------------------
	# _has_enumeration_intent
	# ---------------------------------------------------------------------------


	class TestHasEnumerationIntent:
	def test_list_all_projects(self):
	assert _has_enumeration_intent("list all projects") is True

	def test_list_projects_no_all(self):
	assert _has_enumeration_intent("list projects") is True

	def test_show_all_blogs(self):
	assert _has_enumeration_intent("show all blog posts") is True

	def test_how_many_blogs(self):
	assert _has_enumeration_intent("how many blog posts do you have") is True

	def test_count_projects(self):
	assert _has_enumeration_intent("count projects") is True

	def test_enumerate_skills(self):
	assert _has_enumeration_intent("enumerate all skills") is True

	def test_give_me_a_list_of(self):
	assert _has_enumeration_intent("give me a list of your projects") is True

	def test_what_are_all_the_projects(self):
	# trailing-regex pattern: "what are all the X"
	assert _has_enumeration_intent("what are all the projects") is True

	def test_which_are_all_the_blogs(self):
	# Requires "all" keyword — the trailing regex gate prevents over-triggering.
	assert _has_enumeration_intent("which are all the blog posts") is True

	def test_regular_how_query_no_intent(self):
	assert _has_enumeration_intent("how does TextOps work") is False

	def test_explain_query_no_intent(self):
	assert _has_enumeration_intent("explain the architecture of PersonaBot") is False

	def test_what_is_query_no_intent(self):
	assert _has_enumeration_intent("what is echo-echo") is False

	def test_tell_me_about_no_intent(self):
	assert _has_enumeration_intent("tell me about your background") is False

	def test_empty_string(self):
	assert _has_enumeration_intent("") is False


	# ---------------------------------------------------------------------------
	# _extract_source_types
	# ---------------------------------------------------------------------------


	class TestExtractSourceTypes:
	def test_projects(self):
	types = _extract_source_types("list all projects")
	assert "project" in types

	def test_blogs(self):
	types = _extract_source_types("show all blog posts")
	assert "blog" in types

	def test_skills_cv(self):
	types = _extract_source_types("list all your skills")
	assert "cv" in types

	def test_generic_returns_empty(self):
	# "everything" or "all" without a type token → [] meaning scroll all types
	types = _extract_source_types("list everything")
	assert types == []

	def test_github_repos(self):
	types = _extract_source_types("show all github repos")
	assert "github" in types

	def test_work_experience(self):
	types = _extract_source_types("list all work experience")
	assert "cv" in types


	# ---------------------------------------------------------------------------
	# make_enumerate_query_node
	# ---------------------------------------------------------------------------


	@pytest.mark.asyncio
	async def test_non_enumeration_query_passes_through():
	"""A regular query must exit the node with is_enumeration_query=False."""
	mock_vs = MagicMock()
	mock_vs.scroll_by_source_type = MagicMock(return_value=[])

	node = make_enumerate_query_node(mock_vs)
	state = {"query": "how does TextOps work", "retrieval_attempts": 0}
	with patch(_WRITER_PATCH, return_value=MagicMock()):
	result = node(state)

	assert result["is_enumeration_query"] is False
	# Vector store must NOT be called for normal queries (zero cost guarantee).
	mock_vs.scroll_by_source_type.assert_not_called()


	@pytest.mark.asyncio
	async def test_enumeration_query_sets_flag_and_populates_chunks():
	"""An enumeration query must call scroll and set is_enumeration_query=True."""
	chunk_a = {
	"text": "TextOps is a CLI toolkit.",
	"metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-1"},
	}
	chunk_b = {
	"text": "Echo-Echo is a WebRTC demo.",
	"metadata": {"source_title": "Echo-Echo", "source_type": "project", "doc_id": "echo-1"},
	}
	mock_vs = MagicMock()
	mock_vs.scroll_by_source_type = MagicMock(return_value=[chunk_a, chunk_b])

	node = make_enumerate_query_node(mock_vs)
	state = {"query": "list all projects", "retrieval_attempts": 0}
	with patch(_WRITER_PATCH, return_value=MagicMock()):
	result = node(state)

	assert result["is_enumeration_query"] is True
	assert len(result["reranked_chunks"]) == 2
	mock_vs.scroll_by_source_type.assert_called_once()


	@pytest.mark.asyncio
	async def test_enumeration_deduplicates_by_source_title():
	"""Duplicate source_title chunks must be collapsed to one representative."""
	chunk_a = {
	"text": "TextOps chunk 1",
	"metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-1"},
	}
	chunk_b = {
	"text": "TextOps chunk 2",
	"metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-2"},
	}
	mock_vs = MagicMock()
	mock_vs.scroll_by_source_type = MagicMock(return_value=[chunk_a, chunk_b])

	node = make_enumerate_query_node(mock_vs)
	state = {"query": "list all projects", "retrieval_attempts": 0}
	with patch(_WRITER_PATCH, return_value=MagicMock()):
	result = node(state)

	assert result["is_enumeration_query"] is True
	assert len(result["reranked_chunks"]) == 1


	@pytest.mark.asyncio
	async def test_enumeration_empty_scroll_returns_not_found():
	"""When Qdrant returns no chunks, is_enumeration_query stays False (no results to list)."""
	mock_vs = MagicMock()
	mock_vs.scroll_by_source_type = MagicMock(return_value=[])

	node = make_enumerate_query_node(mock_vs)
	state = {"query": "list all projects", "retrieval_attempts": 0}
	with patch(_WRITER_PATCH, return_value=MagicMock()):
	result = node(state)

	# With no chunks, the node does not commit to enumeration path; falls to RAG.
	assert result["is_enumeration_query"] is False


	# ---------------------------------------------------------------------------
	# is_portfolio_relevant (Fix 2 Rule 1)
	# ---------------------------------------------------------------------------


	class TestIsPortfolioRelevant:
	def test_known_project_name(self):
	assert is_portfolio_relevant("how does textops work") is True

	def test_known_project_variant(self):
	assert is_portfolio_relevant("tell me about echo echo") is True

	def test_known_technology(self):
	assert is_portfolio_relevant("explain the use of langchain in your stack") is True

	def test_known_organisation(self):
	assert is_portfolio_relevant("what did you do at vk live") is True

	def test_unrelated_query(self):
	assert is_portfolio_relevant("what is the weather in london") is False

	def test_generic_question(self):
	assert is_portfolio_relevant("tell me a joke") is False

	def test_empty_string(self):
	assert is_portfolio_relevant("") is False

	def test_resume_intent_keywords_are_relevant(self):
	assert is_portfolio_relevant("tell me about his work experience") is True

	def test_stt_typo_work_experience_is_still_relevant(self):
	assert is_portfolio_relevant("tell me about his walk experience") is True

	def test_tech_stack_intent_is_relevant(self):
	assert is_portfolio_relevant("Could you tell me about his tech stack?") is True

	def test_professional_setting_work_experience_is_relevant(self):
	assert is_portfolio_relevant("What work experience do you have in a professional setting") is True

	def test_tech_stack_use_phrase_is_relevant(self):
	assert is_portfolio_relevant("What tech stack does he use") is True