Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """ | |
| Test TREC Integration - SysCRED | |
| ================================ | |
| Integration tests for TREC AP88-90 evidence retrieval. | |
| Tests: | |
| 1. TRECRetriever initialization | |
| 2. Evidence retrieval | |
| 3. Integration with VerificationSystem | |
| 4. Batch retrieval | |
| 5. Metrics evaluation | |
| (c) Dominique S. Loyer - PhD Thesis Prototype | |
| Citation Key: loyerEvaluationModelesRecherche2025 | |
| """ | |
| import sys | |
| import unittest | |
| from pathlib import Path | |
| # Add parent to path | |
| sys.path.insert(0, str(Path(__file__).parent.parent)) | |
| from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult | |
| from syscred.trec_dataset import TRECDataset, TRECTopic, SAMPLE_TOPICS | |
| from syscred.eval_metrics import EvaluationMetrics | |
| from syscred.ir_engine import IREngine | |
| class TestTRECRetriever(unittest.TestCase): | |
| """Tests for TRECRetriever class.""" | |
| def setUpClass(cls): | |
| """Set up retriever with sample corpus.""" | |
| cls.retriever = TRECRetriever(use_stemming=True, enable_prf=False) | |
| # Add sample corpus for testing | |
| cls.retriever.corpus = { | |
| "AP880101-0001": { | |
| "text": "Climate change is primarily caused by human activities, particularly the burning of fossil fuels.", | |
| "title": "Climate Science Report" | |
| }, | |
| "AP880101-0002": { | |
| "text": "The Earth's temperature has risen significantly over the past century due to greenhouse gas emissions.", | |
| "title": "Global Warming Study" | |
| }, | |
| "AP880102-0001": { | |
| "text": "Natural climate variations have occurred throughout Earth's history, including ice ages.", | |
| "title": "Climate History" | |
| }, | |
| "AP880102-0002": { | |
| "text": "Renewable energy sources like solar and wind can help reduce carbon emissions significantly.", | |
| "title": "Green Energy Solutions" | |
| }, | |
| "AP880103-0001": { | |
| "text": "Scientific consensus supports the theory that humans are the primary cause of recent climate change.", | |
| "title": "IPCC Summary" | |
| }, | |
| "AP890215-0001": { | |
| "text": "The presidential election campaign focused on economic issues and foreign policy.", | |
| "title": "Election Coverage" | |
| }, | |
| "AP890216-0001": { | |
| "text": "Stock markets rose sharply after positive economic indicators were released.", | |
| "title": "Financial News" | |
| }, | |
| } | |
| def test_retriever_initialization(self): | |
| """Test that retriever initializes correctly.""" | |
| self.assertIsNotNone(self.retriever) | |
| self.assertIsNotNone(self.retriever.ir_engine) | |
| self.assertEqual(len(self.retriever.corpus), 7) | |
| def test_evidence_retrieval(self): | |
| """Test evidence retrieval for a claim.""" | |
| result = self.retriever.retrieve_evidence( | |
| claim="Climate change is caused by human activities", | |
| k=3 | |
| ) | |
| self.assertIsInstance(result, RetrievalResult) | |
| self.assertGreater(len(result.evidences), 0) | |
| self.assertLessEqual(len(result.evidences), 3) | |
| # Check first evidence | |
| first = result.evidences[0] | |
| self.assertIsInstance(first, Evidence) | |
| self.assertTrue(first.doc_id.startswith("AP")) | |
| self.assertGreater(first.score, 0) | |
| self.assertEqual(first.rank, 1) | |
| def test_batch_retrieval(self): | |
| """Test batch evidence retrieval.""" | |
| claims = [ | |
| "Climate change is real", | |
| "Stock markets and economy", | |
| "Presidential election" | |
| ] | |
| results = self.retriever.batch_retrieve(claims, k=2) | |
| self.assertEqual(len(results), 3) | |
| for result in results: | |
| self.assertIsInstance(result, RetrievalResult) | |
| def test_statistics(self): | |
| """Test statistics collection.""" | |
| # Run a query first | |
| self.retriever.retrieve_evidence("test query", k=2) | |
| stats = self.retriever.get_statistics() | |
| self.assertIn("queries_processed", stats) | |
| self.assertIn("corpus_size", stats) | |
| self.assertGreater(stats["queries_processed"], 0) | |
| class TestTRECDataset(unittest.TestCase): | |
| """Tests for TRECDataset class.""" | |
| def test_sample_topics(self): | |
| """Test sample topics availability.""" | |
| self.assertIsNotNone(SAMPLE_TOPICS) | |
| self.assertGreater(len(SAMPLE_TOPICS), 0) | |
| # Check structure | |
| for topic_id, topic in SAMPLE_TOPICS.items(): | |
| self.assertIsInstance(topic, TRECTopic) | |
| self.assertTrue(topic.title) | |
| def test_dataset_initialization(self): | |
| """Test dataset initialization.""" | |
| dataset = TRECDataset() | |
| self.assertIsNotNone(dataset) | |
| self.assertEqual(len(dataset.topics), 0) | |
| self.assertEqual(len(dataset.qrels), 0) | |
| def test_topic_query_generation(self): | |
| """Test query generation from topics.""" | |
| dataset = TRECDataset() | |
| dataset.topics = SAMPLE_TOPICS.copy() | |
| short_queries = dataset.get_topic_queries(query_type="short") | |
| long_queries = dataset.get_topic_queries(query_type="long") | |
| self.assertEqual(len(short_queries), len(SAMPLE_TOPICS)) | |
| self.assertEqual(len(long_queries), len(SAMPLE_TOPICS)) | |
| class TestEvaluationMetrics(unittest.TestCase): | |
| """Tests for EvaluationMetrics class.""" | |
| def setUp(self): | |
| self.metrics = EvaluationMetrics() | |
| def test_precision_at_k(self): | |
| """Test P@K calculation.""" | |
| retrieved = ["doc1", "doc2", "doc3", "doc4", "doc5"] | |
| relevant = {"doc1", "doc3", "doc5"} | |
| p_at_3 = self.metrics.precision_at_k(retrieved, relevant, k=3) | |
| self.assertAlmostEqual(p_at_3, 2/3) # doc1 and doc3 in top 3 | |
| p_at_5 = self.metrics.precision_at_k(retrieved, relevant, k=5) | |
| self.assertAlmostEqual(p_at_5, 3/5) | |
| def test_recall_at_k(self): | |
| """Test R@K calculation.""" | |
| retrieved = ["doc1", "doc2", "doc3", "doc4", "doc5"] | |
| relevant = {"doc1", "doc3", "doc5", "doc7"} # 4 relevant, doc7 not retrieved | |
| r_at_5 = self.metrics.recall_at_k(retrieved, relevant, k=5) | |
| self.assertAlmostEqual(r_at_5, 3/4) # 3 of 4 relevant docs retrieved | |
| def test_average_precision(self): | |
| """Test AP calculation.""" | |
| retrieved = ["doc1", "doc2", "doc3", "doc4"] | |
| relevant = {"doc1", "doc3"} | |
| ap = self.metrics.average_precision(retrieved, relevant) | |
| # AP = (1/2) * (1/1 + 2/3) = 0.5 * 1.667 = 0.833 | |
| expected = (1.0 + 2/3) / 2 | |
| self.assertAlmostEqual(ap, expected, places=4) | |
| def test_reciprocal_rank(self): | |
| """Test MRR calculation.""" | |
| retrieved = ["doc2", "doc3", "doc1", "doc4"] | |
| relevant = {"doc1"} | |
| rr = self.metrics.reciprocal_rank(retrieved, relevant) | |
| self.assertAlmostEqual(rr, 1/3) # doc1 is at rank 3 | |
| class TestIREngine(unittest.TestCase): | |
| """Tests for IREngine class.""" | |
| def setUp(self): | |
| self.engine = IREngine(use_stemming=True) | |
| def test_preprocessing(self): | |
| """Test text preprocessing.""" | |
| text = "The quick brown fox JUMPS over the lazy dog!" | |
| processed = self.engine.preprocess(text) | |
| # Should be lowercase, no common stopwords | |
| self.assertNotIn("the", processed) | |
| self.assertTrue(processed.islower()) | |
| # Should contain content words | |
| self.assertIn("quick", processed) | |
| self.assertIn("brown", processed) | |
| def test_tfidf_calculation(self): | |
| """Test TF-IDF scoring (basic).""" | |
| # This tests the internal TF-IDF if pyserini not available | |
| self.assertIsNotNone(self.engine) | |
| class TestVerificationSystemIntegration(unittest.TestCase): | |
| """Integration tests with VerificationSystem.""" | |
| def setUpClass(cls): | |
| """Initialize system without ML models for speed.""" | |
| try: | |
| from syscred.verification_system import CredibilityVerificationSystem | |
| cls.system = CredibilityVerificationSystem(load_ml_models=False) | |
| cls.skip = False | |
| except Exception as e: | |
| print(f"Skipping integration tests: {e}") | |
| cls.skip = True | |
| def test_system_has_retriever(self): | |
| """Test that system has TREC retriever.""" | |
| if self.skip: | |
| self.skipTest("VerificationSystem not available") | |
| # Retriever might be None if no corpus configured | |
| self.assertTrue(hasattr(self.system, 'trec_retriever')) | |
| def test_retrieve_evidence_method(self): | |
| """Test retrieve_evidence method.""" | |
| if self.skip: | |
| self.skipTest("VerificationSystem not available") | |
| # Should return empty list if no corpus | |
| evidences = self.system.retrieve_evidence("test claim") | |
| self.assertIsInstance(evidences, list) | |
| def test_verify_with_evidence_method(self): | |
| """Test verify_with_evidence method.""" | |
| if self.skip: | |
| self.skipTest("VerificationSystem not available") | |
| result = self.system.verify_with_evidence("Climate change is real") | |
| self.assertIn('claim', result) | |
| self.assertIn('evidences', result) | |
| self.assertIn('verification_verdict', result) | |
| self.assertIn('confidence', result) | |
| if __name__ == "__main__": | |
| print("=" * 60) | |
| print("SysCRED TREC Integration Tests") | |
| print("=" * 60) | |
| # Run with verbosity | |
| unittest.main(verbosity=2) | |