""" Evidence Verifier Verifies that claims are supported by document evidence. Cross-references extracted information with source documents. """ from typing import List, Optional, Dict, Any, Tuple from enum import Enum from pydantic import BaseModel, Field from loguru import logger import re class EvidenceStrength(str, Enum): """Evidence strength levels.""" STRONG = "strong" # Directly quoted/stated MODERATE = "moderate" # Implied or paraphrased WEAK = "weak" # Tangentially related NONE = "none" # No supporting evidence class VerifierConfig(BaseModel): """Configuration for evidence verifier.""" # Matching settings fuzzy_match: bool = Field(default=True, description="Enable fuzzy matching") case_sensitive: bool = Field(default=False, description="Case-sensitive matching") min_match_ratio: float = Field( default=0.6, ge=0.0, le=1.0, description="Minimum match ratio for fuzzy matching" ) # Scoring strong_threshold: float = Field(default=0.9, ge=0.0, le=1.0) moderate_threshold: float = Field(default=0.7, ge=0.0, le=1.0) weak_threshold: float = Field(default=0.5, ge=0.0, le=1.0) # Processing max_evidence_per_claim: int = Field(default=5, ge=1) context_window: int = Field(default=100, description="Characters around match") class EvidenceMatch(BaseModel): """A match between claim and evidence.""" evidence_text: str match_score: float strength: EvidenceStrength # Location chunk_id: Optional[str] = None page: Optional[int] = None position: Optional[int] = None # Context context_before: Optional[str] = None context_after: Optional[str] = None class VerificationResult(BaseModel): """Result of evidence verification.""" claim: str verified: bool strength: EvidenceStrength confidence: float # Evidence evidence_matches: List[EvidenceMatch] best_match: Optional[EvidenceMatch] = None # Analysis coverage_score: float # How much of claim is covered contradiction_found: bool = False notes: Optional[str] = None class EvidenceVerifier: """ Verifies claims against document evidence. Features: - Text matching (exact and fuzzy) - Evidence strength scoring - Contradiction detection - Context extraction """ def __init__(self, config: Optional[VerifierConfig] = None): """Initialize evidence verifier.""" self.config = config or VerifierConfig() def verify_claim( self, claim: str, evidence_chunks: List[Dict[str, Any]], ) -> VerificationResult: """ Verify a claim against evidence. Args: claim: The claim to verify evidence_chunks: List of evidence chunks with text Returns: VerificationResult """ if not claim or not evidence_chunks: return VerificationResult( claim=claim, verified=False, strength=EvidenceStrength.NONE, confidence=0.0, evidence_matches=[], coverage_score=0.0, ) # Find matches in evidence matches = [] for chunk in evidence_chunks: chunk_text = chunk.get("text", "") if not chunk_text: continue chunk_matches = self._find_matches(claim, chunk_text, chunk) matches.extend(chunk_matches) # Sort by score and take top matches matches.sort(key=lambda m: m.match_score, reverse=True) top_matches = matches[:self.config.max_evidence_per_claim] # Calculate overall scores if top_matches: best_match = top_matches[0] overall_strength = best_match.strength confidence = best_match.match_score coverage_score = self._calculate_coverage(claim, top_matches) else: best_match = None overall_strength = EvidenceStrength.NONE confidence = 0.0 coverage_score = 0.0 # Determine verification status verified = ( overall_strength in [EvidenceStrength.STRONG, EvidenceStrength.MODERATE] and confidence >= self.config.moderate_threshold ) # Check for contradictions contradiction_found = self._check_contradictions(claim, evidence_chunks) return VerificationResult( claim=claim, verified=verified and not contradiction_found, strength=overall_strength, confidence=confidence, evidence_matches=top_matches, best_match=best_match, coverage_score=coverage_score, contradiction_found=contradiction_found, ) def verify_multiple( self, claims: List[str], evidence_chunks: List[Dict[str, Any]], ) -> List[VerificationResult]: """ Verify multiple claims against evidence. Args: claims: List of claims to verify evidence_chunks: Evidence chunks Returns: List of VerificationResult """ return [self.verify_claim(claim, evidence_chunks) for claim in claims] def verify_extraction( self, extraction: Dict[str, Any], evidence_chunks: List[Dict[str, Any]], ) -> Dict[str, VerificationResult]: """ Verify extracted fields as claims. Args: extraction: Dictionary of field -> value evidence_chunks: Evidence chunks Returns: Dictionary of field -> VerificationResult """ results = {} for field, value in extraction.items(): if value is None: continue # Convert to claim claim = f"{field}: {value}" results[field] = self.verify_claim(claim, evidence_chunks) return results def _find_matches( self, claim: str, text: str, chunk: Dict[str, Any], ) -> List[EvidenceMatch]: """Find matches for claim in text.""" matches = [] # Normalize texts claim_normalized = claim.lower() if not self.config.case_sensitive else claim text_normalized = text.lower() if not self.config.case_sensitive else text # Extract key terms from claim terms = self._extract_terms(claim_normalized) # Try exact substring match if claim_normalized in text_normalized: pos = text_normalized.find(claim_normalized) match = self._create_match( text, pos, len(claim), chunk, score=1.0, strength=EvidenceStrength.STRONG ) matches.append(match) # Try term matching term_scores = [] for term in terms: if term in text_normalized: pos = text_normalized.find(term) term_scores.append((term, pos, 1.0)) elif self.config.fuzzy_match: # Try fuzzy match fuzzy_score, fuzzy_pos = self._fuzzy_find(term, text_normalized) if fuzzy_score >= self.config.min_match_ratio: term_scores.append((term, fuzzy_pos, fuzzy_score)) if term_scores: # Calculate combined score avg_score = sum(s[2] for s in term_scores) / len(terms) if terms else 0 coverage = len(term_scores) / len(terms) if terms else 0 combined_score = (avg_score * 0.7) + (coverage * 0.3) # Determine strength if combined_score >= self.config.strong_threshold: strength = EvidenceStrength.STRONG elif combined_score >= self.config.moderate_threshold: strength = EvidenceStrength.MODERATE elif combined_score >= self.config.weak_threshold: strength = EvidenceStrength.WEAK else: strength = EvidenceStrength.NONE # Create match at first term position if strength != EvidenceStrength.NONE: best_term = max(term_scores, key=lambda t: t[2]) match = self._create_match( text, best_term[1], len(best_term[0]), chunk, score=combined_score, strength=strength ) matches.append(match) return matches def _create_match( self, text: str, position: int, length: int, chunk: Dict[str, Any], score: float, strength: EvidenceStrength, ) -> EvidenceMatch: """Create an evidence match with context.""" # Extract context window = self.config.context_window start = max(0, position - window) end = min(len(text), position + length + window) context_before = text[start:position] if position > 0 else "" evidence_text = text[position:position + length] context_after = text[position + length:end] if position + length < len(text) else "" return EvidenceMatch( evidence_text=evidence_text, match_score=score, strength=strength, chunk_id=chunk.get("chunk_id"), page=chunk.get("page"), position=position, context_before=context_before[-50:] if context_before else None, context_after=context_after[:50] if context_after else None, ) def _extract_terms(self, text: str) -> List[str]: """Extract key terms from text.""" # Remove common stop words and punctuation stop_words = { "the", "a", "an", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "do", "does", "did", "will", "would", "could", "should", "may", "might", "must", "shall", "can", "need", "dare", "ought", "used", "to", "of", "in", "for", "on", "with", "at", "by", "from", "as", "into", "through", "during", "before", "after", "above", "below", "between", "and", "but", "if", "or", "because", "until", "while", } # Tokenize words = re.findall(r'\b\w+\b', text.lower()) # Filter terms = [w for w in words if w not in stop_words and len(w) > 2] return terms def _fuzzy_find(self, term: str, text: str) -> Tuple[float, int]: """Find term in text with fuzzy matching.""" # Simple sliding window match best_score = 0.0 best_pos = 0 term_len = len(term) for i in range(len(text) - term_len + 1): window = text[i:i + term_len] # Calculate simple match ratio matches = sum(1 for a, b in zip(term, window) if a == b) score = matches / term_len if score > best_score: best_score = score best_pos = i return best_score, best_pos def _calculate_coverage( self, claim: str, matches: List[EvidenceMatch], ) -> float: """Calculate how much of the claim is covered by evidence.""" claim_terms = set(self._extract_terms(claim.lower())) if not claim_terms: return 0.0 covered_terms = set() for match in matches: match_terms = set(self._extract_terms(match.evidence_text.lower())) covered_terms.update(match_terms.intersection(claim_terms)) return len(covered_terms) / len(claim_terms) def _check_contradictions( self, claim: str, evidence_chunks: List[Dict[str, Any]], ) -> bool: """Check if evidence contains contradictions to the claim.""" # Simple negation patterns negation_patterns = [ r'\bnot\b', r'\bno\b', r'\bnever\b', r'\bnone\b', r'\bwithout\b', r'\bfailed\b', r'\bdenied\b', ] claim_lower = claim.lower() claim_terms = set(self._extract_terms(claim_lower)) for chunk in evidence_chunks: text = chunk.get("text", "").lower() # Check if evidence has claim terms with negation for term in claim_terms: if term in text: # Check for nearby negation for pattern in negation_patterns: matches = list(re.finditer(pattern, text)) for match in matches: # Check if negation is near the term term_pos = text.find(term) if abs(match.start() - term_pos) < 30: return True return False # Global instance and factory _evidence_verifier: Optional[EvidenceVerifier] = None def get_evidence_verifier( config: Optional[VerifierConfig] = None, ) -> EvidenceVerifier: """Get or create singleton evidence verifier.""" global _evidence_verifier if _evidence_verifier is None: _evidence_verifier = EvidenceVerifier(config) return _evidence_verifier def reset_evidence_verifier(): """Reset the global verifier instance.""" global _evidence_verifier _evidence_verifier = None