syscred_duplicate / syscred /test_trec_integration.py
D Ф m i И i q ц e L Ф y e r
Deploy SysCRED with PyTorch
e70050b
# -*- coding: utf-8 -*-
"""
Test TREC Integration - SysCRED
================================
Integration tests for TREC AP88-90 evidence retrieval.
Tests:
1. TRECRetriever initialization
2. Evidence retrieval
3. Integration with VerificationSystem
4. Batch retrieval
5. Metrics evaluation
(c) Dominique S. Loyer - PhD Thesis Prototype
Citation Key: loyerEvaluationModelesRecherche2025
"""
import sys
import unittest
from pathlib import Path
# Add parent to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from syscred.trec_retriever import TRECRetriever, Evidence, RetrievalResult
from syscred.trec_dataset import TRECDataset, TRECTopic, SAMPLE_TOPICS
from syscred.eval_metrics import EvaluationMetrics
from syscred.ir_engine import IREngine
class TestTRECRetriever(unittest.TestCase):
"""Tests for TRECRetriever class."""
@classmethod
def setUpClass(cls):
"""Set up retriever with sample corpus."""
cls.retriever = TRECRetriever(use_stemming=True, enable_prf=False)
# Add sample corpus for testing
cls.retriever.corpus = {
"AP880101-0001": {
"text": "Climate change is primarily caused by human activities, particularly the burning of fossil fuels.",
"title": "Climate Science Report"
},
"AP880101-0002": {
"text": "The Earth's temperature has risen significantly over the past century due to greenhouse gas emissions.",
"title": "Global Warming Study"
},
"AP880102-0001": {
"text": "Natural climate variations have occurred throughout Earth's history, including ice ages.",
"title": "Climate History"
},
"AP880102-0002": {
"text": "Renewable energy sources like solar and wind can help reduce carbon emissions significantly.",
"title": "Green Energy Solutions"
},
"AP880103-0001": {
"text": "Scientific consensus supports the theory that humans are the primary cause of recent climate change.",
"title": "IPCC Summary"
},
"AP890215-0001": {
"text": "The presidential election campaign focused on economic issues and foreign policy.",
"title": "Election Coverage"
},
"AP890216-0001": {
"text": "Stock markets rose sharply after positive economic indicators were released.",
"title": "Financial News"
},
}
def test_retriever_initialization(self):
"""Test that retriever initializes correctly."""
self.assertIsNotNone(self.retriever)
self.assertIsNotNone(self.retriever.ir_engine)
self.assertEqual(len(self.retriever.corpus), 7)
def test_evidence_retrieval(self):
"""Test evidence retrieval for a claim."""
result = self.retriever.retrieve_evidence(
claim="Climate change is caused by human activities",
k=3
)
self.assertIsInstance(result, RetrievalResult)
self.assertGreater(len(result.evidences), 0)
self.assertLessEqual(len(result.evidences), 3)
# Check first evidence
first = result.evidences[0]
self.assertIsInstance(first, Evidence)
self.assertTrue(first.doc_id.startswith("AP"))
self.assertGreater(first.score, 0)
self.assertEqual(first.rank, 1)
def test_batch_retrieval(self):
"""Test batch evidence retrieval."""
claims = [
"Climate change is real",
"Stock markets and economy",
"Presidential election"
]
results = self.retriever.batch_retrieve(claims, k=2)
self.assertEqual(len(results), 3)
for result in results:
self.assertIsInstance(result, RetrievalResult)
def test_statistics(self):
"""Test statistics collection."""
# Run a query first
self.retriever.retrieve_evidence("test query", k=2)
stats = self.retriever.get_statistics()
self.assertIn("queries_processed", stats)
self.assertIn("corpus_size", stats)
self.assertGreater(stats["queries_processed"], 0)
class TestTRECDataset(unittest.TestCase):
"""Tests for TRECDataset class."""
def test_sample_topics(self):
"""Test sample topics availability."""
self.assertIsNotNone(SAMPLE_TOPICS)
self.assertGreater(len(SAMPLE_TOPICS), 0)
# Check structure
for topic_id, topic in SAMPLE_TOPICS.items():
self.assertIsInstance(topic, TRECTopic)
self.assertTrue(topic.title)
def test_dataset_initialization(self):
"""Test dataset initialization."""
dataset = TRECDataset()
self.assertIsNotNone(dataset)
self.assertEqual(len(dataset.topics), 0)
self.assertEqual(len(dataset.qrels), 0)
def test_topic_query_generation(self):
"""Test query generation from topics."""
dataset = TRECDataset()
dataset.topics = SAMPLE_TOPICS.copy()
short_queries = dataset.get_topic_queries(query_type="short")
long_queries = dataset.get_topic_queries(query_type="long")
self.assertEqual(len(short_queries), len(SAMPLE_TOPICS))
self.assertEqual(len(long_queries), len(SAMPLE_TOPICS))
class TestEvaluationMetrics(unittest.TestCase):
"""Tests for EvaluationMetrics class."""
def setUp(self):
self.metrics = EvaluationMetrics()
def test_precision_at_k(self):
"""Test P@K calculation."""
retrieved = ["doc1", "doc2", "doc3", "doc4", "doc5"]
relevant = {"doc1", "doc3", "doc5"}
p_at_3 = self.metrics.precision_at_k(retrieved, relevant, k=3)
self.assertAlmostEqual(p_at_3, 2/3) # doc1 and doc3 in top 3
p_at_5 = self.metrics.precision_at_k(retrieved, relevant, k=5)
self.assertAlmostEqual(p_at_5, 3/5)
def test_recall_at_k(self):
"""Test R@K calculation."""
retrieved = ["doc1", "doc2", "doc3", "doc4", "doc5"]
relevant = {"doc1", "doc3", "doc5", "doc7"} # 4 relevant, doc7 not retrieved
r_at_5 = self.metrics.recall_at_k(retrieved, relevant, k=5)
self.assertAlmostEqual(r_at_5, 3/4) # 3 of 4 relevant docs retrieved
def test_average_precision(self):
"""Test AP calculation."""
retrieved = ["doc1", "doc2", "doc3", "doc4"]
relevant = {"doc1", "doc3"}
ap = self.metrics.average_precision(retrieved, relevant)
# AP = (1/2) * (1/1 + 2/3) = 0.5 * 1.667 = 0.833
expected = (1.0 + 2/3) / 2
self.assertAlmostEqual(ap, expected, places=4)
def test_reciprocal_rank(self):
"""Test MRR calculation."""
retrieved = ["doc2", "doc3", "doc1", "doc4"]
relevant = {"doc1"}
rr = self.metrics.reciprocal_rank(retrieved, relevant)
self.assertAlmostEqual(rr, 1/3) # doc1 is at rank 3
class TestIREngine(unittest.TestCase):
"""Tests for IREngine class."""
def setUp(self):
self.engine = IREngine(use_stemming=True)
def test_preprocessing(self):
"""Test text preprocessing."""
text = "The quick brown fox JUMPS over the lazy dog!"
processed = self.engine.preprocess(text)
# Should be lowercase, no common stopwords
self.assertNotIn("the", processed)
self.assertTrue(processed.islower())
# Should contain content words
self.assertIn("quick", processed)
self.assertIn("brown", processed)
def test_tfidf_calculation(self):
"""Test TF-IDF scoring (basic)."""
# This tests the internal TF-IDF if pyserini not available
self.assertIsNotNone(self.engine)
class TestVerificationSystemIntegration(unittest.TestCase):
"""Integration tests with VerificationSystem."""
@classmethod
def setUpClass(cls):
"""Initialize system without ML models for speed."""
try:
from syscred.verification_system import CredibilityVerificationSystem
cls.system = CredibilityVerificationSystem(load_ml_models=False)
cls.skip = False
except Exception as e:
print(f"Skipping integration tests: {e}")
cls.skip = True
def test_system_has_retriever(self):
"""Test that system has TREC retriever."""
if self.skip:
self.skipTest("VerificationSystem not available")
# Retriever might be None if no corpus configured
self.assertTrue(hasattr(self.system, 'trec_retriever'))
def test_retrieve_evidence_method(self):
"""Test retrieve_evidence method."""
if self.skip:
self.skipTest("VerificationSystem not available")
# Should return empty list if no corpus
evidences = self.system.retrieve_evidence("test claim")
self.assertIsInstance(evidences, list)
def test_verify_with_evidence_method(self):
"""Test verify_with_evidence method."""
if self.skip:
self.skipTest("VerificationSystem not available")
result = self.system.verify_with_evidence("Climate change is real")
self.assertIn('claim', result)
self.assertIn('evidences', result)
self.assertIn('verification_verdict', result)
self.assertIn('confidence', result)
if __name__ == "__main__":
print("=" * 60)
print("SysCRED TREC Integration Tests")
print("=" * 60)
# Run with verbosity
unittest.main(verbosity=2)