Spaces:

samir72
/

Multi-Agent-Research-Paper-Analysis-System

Sleeping

Multi-Agent-Research-Paper-Analysis-System / tests /test_schema_validators.py

GitHub Actions

Clean sync from GitHub - no large files in history

aca8ab4 3 months ago

8.18 kB

	"""
	Unit tests for Pydantic schema validators.
	Tests the field_validator decorators in utils/schemas.py.
	"""
	import pytest
	from datetime import datetime

	from utils.schemas import Analysis, ConsensusPoint, Contradiction, SynthesisResult


	class TestAnalysisValidators:
	"""Tests for Analysis schema validators."""

	def test_citations_with_nested_empty_list(self):
	"""Test that nested empty lists in citations are flattened."""
	analysis = Analysis(
	paper_id="test_id",
	methodology="Test methodology",
	key_findings=["Finding 1"],
	conclusions="Test conclusions",
	limitations=["Limit 1"],
	citations=["Citation 1", [], "Citation 2"], # Nested empty list
	main_contributions=["Contribution 1"],
	confidence_score=0.8
	)

	# Should flatten and remove empty lists
	assert analysis.citations == ["Citation 1", "Citation 2"]

	def test_citations_with_deeply_nested_lists(self):
	"""Test deeply nested lists are flattened."""
	analysis = Analysis(
	paper_id="test_id",
	methodology="Test",
	key_findings=[["Nested finding"]],
	conclusions="Test",
	limitations=[[["Triple nested"]]],
	citations=[[["Deep citation"]]],
	main_contributions=[],
	confidence_score=0.5
	)

	assert analysis.key_findings == ["Nested finding"]
	assert analysis.limitations == ["Triple nested"]
	assert analysis.citations == ["Deep citation"]

	def test_mixed_types_are_normalized(self):
	"""Test that mixed types in lists are handled."""
	analysis = Analysis(
	paper_id="test_id",
	methodology="Test",
	key_findings=["Finding", None, 123, ""],
	conclusions="Test",
	limitations=[456, "Limit"],
	citations=["Citation", None, ""],
	confidence_score=0.7
	)

	# None and empty strings filtered out, numbers converted to strings
	assert analysis.key_findings == ["Finding", "123"]
	assert analysis.limitations == ["456", "Limit"]
	assert analysis.citations == ["Citation"]

	def test_string_converted_to_list(self):
	"""Test that strings in list fields are converted to single-element lists."""
	analysis = Analysis(
	paper_id="test_id",
	methodology="Test",
	key_findings="Single finding", # String instead of list
	conclusions="Test",
	limitations="Single limitation", # String instead of list
	citations=[],
	confidence_score=0.6
	)

	assert analysis.key_findings == ["Single finding"]
	assert analysis.limitations == ["Single limitation"]


	class TestConsensusPointValidators:
	"""Tests for ConsensusPoint schema validators."""

	def test_supporting_papers_with_nested_lists(self):
	"""Test that nested lists in supporting_papers are flattened."""
	cp = ConsensusPoint(
	statement="Test consensus",
	supporting_papers=["paper1", [], ["paper2"]],
	citations=["Citation 1", [["Nested citation"]]],
	confidence=0.9
	)

	assert cp.supporting_papers == ["paper1", "paper2"]
	assert cp.citations == ["Citation 1", "Nested citation"]

	def test_empty_and_none_values_filtered(self):
	"""Test that None and empty strings are filtered."""
	cp = ConsensusPoint(
	statement="Test",
	supporting_papers=["paper1", None, "", "paper2"],
	citations=["Citation", None],
	confidence=0.8
	)

	assert cp.supporting_papers == ["paper1", "paper2"]
	assert cp.citations == ["Citation"]


	class TestContradictionValidators:
	"""Tests for Contradiction schema validators."""

	def test_papers_lists_with_nested_values(self):
	"""Test that nested lists in papers_a and papers_b are flattened."""
	contr = Contradiction(
	topic="Test topic",
	viewpoint_a="View A",
	papers_a=["paper1", [], "paper2"],
	viewpoint_b="View B",
	papers_b=[["paper3"], "paper4"],
	citations=["Citation 1", [["Nested"]]],
	confidence=0.7
	)

	assert contr.papers_a == ["paper1", "paper2"]
	assert contr.papers_b == ["paper3", "paper4"]
	assert contr.citations == ["Citation 1", "Nested"]

	def test_mixed_types_normalized(self):
	"""Test mixed types in papers lists."""
	contr = Contradiction(
	topic="Test",
	viewpoint_a="A",
	papers_a=["paper1", 123, None],
	viewpoint_b="B",
	papers_b=[456, "paper2"],
	citations=["Citation"],
	confidence=0.6
	)

	assert contr.papers_a == ["paper1", "123"]
	assert contr.papers_b == ["456", "paper2"]


	class TestSynthesisResultValidators:
	"""Tests for SynthesisResult schema validators."""

	def test_research_gaps_with_nested_lists(self):
	"""Test that nested lists in research_gaps are flattened."""
	synthesis = SynthesisResult(
	consensus_points=[],
	contradictions=[],
	research_gaps=["Gap 1", [["Nested gap"]], None],
	summary="Test summary",
	confidence_score=0.8,
	papers_analyzed=["paper1", [], "paper2"]
	)

	assert synthesis.research_gaps == ["Gap 1", "Nested gap"]
	assert synthesis.papers_analyzed == ["paper1", "paper2"]

	def test_string_converted_to_list(self):
	"""Test that strings are converted to lists."""
	synthesis = SynthesisResult(
	consensus_points=[],
	contradictions=[],
	research_gaps="Single gap", # String instead of list
	summary="Test",
	confidence_score=0.7,
	papers_analyzed="paper1" # String instead of list
	)

	assert synthesis.research_gaps == ["Single gap"]
	assert synthesis.papers_analyzed == ["paper1"]


	class TestValidatorsWithRealWorldData:
	"""Tests simulating real-world LLM response edge cases."""

	def test_llm_returns_empty_arrays_within_citations(self):
	"""Simulate the exact bug reported: citations contains empty lists."""
	# This is the bug: ["citation 1", [], "citation 2"]
	analysis = Analysis(
	paper_id="2303.08710v1",
	methodology="Deep learning approach",
	key_findings=["95% accuracy", [], "Outperforms baselines"],
	conclusions="Novel method works well",
	limitations=["Limited dataset", []],
	citations=["Methodology section", [], "Results section"],
	main_contributions=["Novel architecture"],
	confidence_score=0.85
	)

	# Should successfully create Analysis without Pydantic validation errors
	assert isinstance(analysis, Analysis)
	assert analysis.citations == ["Methodology section", "Results section"]
	assert analysis.key_findings == ["95% accuracy", "Outperforms baselines"]
	assert analysis.limitations == ["Limited dataset"]

	def test_llm_returns_mixed_malformed_data(self):
	"""Test extremely malformed data that might come from LLM."""
	analysis = Analysis(
	paper_id="test_id",
	methodology="Test",
	key_findings=[[], "Finding", None, [["Nested"]], "", " ", 123],
	conclusions="Test",
	limitations=[[["Deep"]], None, "Limit", []],
	citations=["Citation", [[], []], None, ""],
	main_contributions=[None, [], "Contribution", [["Deep contrib"]]],
	confidence_score=0.5
	)

	# All malformed data should be cleaned
	assert analysis.key_findings == ["Finding", "Nested", "123"]
	assert analysis.limitations == ["Deep", "Limit"]
	assert analysis.citations == ["Citation"]
	assert analysis.main_contributions == ["Contribution", "Deep contrib"]


	if __name__ == "__main__":
	pytest.main([__file__, "-v"])