phi-coherence / phi_coherence.py
bitsabhi's picture
v3: Credibility Scoring
1813a42
#!/usr/bin/env python3
"""
φ-Coherence v3 — Credibility Scoring Engine
Detect fabrication patterns in ANY text — human or AI.
No knowledge base. No LLM calls. Pure mathematical pattern detection.
Core insight: Truth and fabrication have different structural fingerprints.
You don't need to know the facts to detect the fingerprints.
Use cases:
- AI hallucination detection
- Fake review detection
- Resume/essay inflation detection
- Marketing copy audit
- News article verification
- RAG quality filtering
Benchmark: 88% accuracy on 25 paragraph-level hallucination pairs.
https://github.com/0x-auth/bazinga-indeed
"""
import math
import re
import hashlib
from typing import Dict
from dataclasses import dataclass, asdict
from collections import Counter
PHI = 1.618033988749895
PHI_INVERSE = 1 / PHI
ALPHA = 137
@dataclass
class CoherenceMetrics:
"""Credibility metrics for a piece of text."""
total_coherence: float # Overall credibility score (0-1)
attribution_quality: float # Specific vs vague sourcing
confidence_calibration: float # Appropriate certainty level
qualifying_ratio: float # "approximately" vs "exactly"
internal_consistency: float # Claims don't contradict
topic_coherence: float # Stays on topic
causal_logic: float # Reasoning makes sense
negation_density: float # Truth states what IS, not ISN'T
numerical_plausibility: float # Numbers follow natural distributions
phi_alignment: float # Golden ratio text proportions
semantic_density: float # Information density
is_alpha_seed: bool # Hash % 137 == 0
risk_level: str # SAFE / MODERATE / HIGH_RISK
def to_dict(self) -> dict:
return asdict(self)
class PhiCoherence:
"""
φ-Coherence v3 — Credibility Scorer
Detects fabrication patterns in any text:
1. Vague Attribution — "Studies show..." without naming sources
2. Confidence Miscalibration — Extreme certainty, stasis claims
3. Qualifying Ratio — "approximately" vs "exactly/definitively"
4. Internal Contradictions — Claims conflict within text
5. Topic Drift — Subject changes mid-paragraph
6. Nonsensical Causality — Teleological/absolute causal language
7. Negation Density — Fabrication states what ISN'T, truth states what IS
8. Numerical Plausibility — Benford's Law, roundness
9. φ-Alignment — Golden ratio text proportions
10. Semantic Density — Information content
"""
def __init__(self):
self.weights = {
'attribution': 0.18,
'confidence': 0.16,
'qualifying': 0.12,
'consistency': 0.10,
'topic': 0.11,
'causal': 0.10,
'negation': 0.08,
'numerical': 0.05,
'phi': 0.05,
'density': 0.05,
}
self._cache: Dict[str, CoherenceMetrics] = {}
def calculate(self, text: str) -> float:
if not text or not text.strip():
return 0.0
return self.analyze(text).total_coherence
def analyze(self, text: str) -> CoherenceMetrics:
if not text or not text.strip():
return CoherenceMetrics(
0, 0, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0, False, "HIGH_RISK"
)
cache_key = hashlib.md5(text[:2000].encode()).hexdigest()
if cache_key in self._cache:
return self._cache[cache_key]
# Core credibility dimensions
confidence = self._detect_confidence_calibration(text)
attribution = self._detect_attribution_quality(text, confidence)
qualifying = self._detect_qualifying_ratio(text)
consistency = self._detect_internal_consistency(text)
topic = self._detect_topic_coherence(text)
causal = self._detect_causal_logic(text)
negation = self._detect_negation_density(text)
numerical = self._detect_numerical_plausibility(text)
# Legacy dimensions
phi = self._calculate_phi_alignment(text)
density = self._calculate_semantic_density(text)
is_alpha = self._is_alpha_seed(text)
# Combined score
total = (
self.weights['attribution'] * attribution +
self.weights['confidence'] * confidence +
self.weights['qualifying'] * qualifying +
self.weights['consistency'] * consistency +
self.weights['topic'] * topic +
self.weights['causal'] * causal +
self.weights['negation'] * negation +
self.weights['numerical'] * numerical +
self.weights['phi'] * phi +
self.weights['density'] * density
)
if is_alpha:
total = min(1.0, total * 1.03)
risk = "SAFE" if total >= 0.58 else ("MODERATE" if total >= 0.40 else "HIGH_RISK")
metrics = CoherenceMetrics(
total_coherence=round(total, 4),
attribution_quality=round(attribution, 4),
confidence_calibration=round(confidence, 4),
qualifying_ratio=round(qualifying, 4),
internal_consistency=round(consistency, 4),
topic_coherence=round(topic, 4),
causal_logic=round(causal, 4),
negation_density=round(negation, 4),
numerical_plausibility=round(numerical, 4),
phi_alignment=round(phi, 4),
semantic_density=round(density, 4),
is_alpha_seed=is_alpha,
risk_level=risk,
)
self._cache[cache_key] = metrics
if len(self._cache) > 1000:
for k in list(self._cache.keys())[:500]:
del self._cache[k]
return metrics
# ============================================================
# CORE DIMENSIONS
# ============================================================
def _detect_attribution_quality(self, text: str, confidence_score: float) -> float:
"""
Vague vs specific sourcing.
Overclaim override: If confidence is very low, cap attribution score.
"""
text_lower = text.lower()
vague_patterns = [
r'\bstudies\s+(show|suggest|indicate|have\s+found|demonstrate)\b',
r'\bresearch(ers)?\s+(show|suggest|indicate|believe|have\s+found)\b',
r'\bexperts?\s+(say|believe|think|argue|suggest|agree)\b',
r'\bscientists?\s+(say|believe|think|argue|suggest|agree)\b',
r'\bit\s+is\s+(widely|generally|commonly|universally)\s+(known|believed|accepted|thought)\b',
r'\b(some|many|several|various|numerous)\s+(people|experts|scientists|researchers|sources)\b',
r'\ba\s+(recent|new|groundbreaking|landmark)\s+study\b',
r'\baccording\s+to\s+(some|many|several|various)\b',
r'\b(sources|reports)\s+(say|suggest|indicate|confirm)\b',
]
specific_patterns = [
r'\baccording\s+to\s+[A-Z][a-z]+',
r'\b(19|20)\d{2}\b',
r'\bpublished\s+in\b',
r'\b[A-Z][a-z]+\s+(University|Institute|Laboratory|Center|Centre)\b',
r'\b(NASA|WHO|CDC|CERN|NIH|MIT|IPCC|IEEE|Nature|Science|Lancet)\b',
r'\b(discovered|measured|observed|documented|recorded)\s+by\b',
r'\b(first|originally)\s+(described|proposed|discovered|measured)\b',
]
vague = sum(1 for p in vague_patterns if re.search(p, text_lower))
specific = sum(1 for p in specific_patterns if re.search(p, text, re.IGNORECASE))
if vague + specific == 0:
raw_score = 0.55
elif vague > 0 and specific == 0:
raw_score = max(0.10, 0.30 - vague * 0.05)
else:
raw_score = 0.25 + 0.75 * (specific / (vague + specific))
# OVERCLAIM OVERRIDE
if confidence_score < 0.25:
raw_score = min(raw_score, 0.45)
elif confidence_score < 0.35:
raw_score = min(raw_score, 0.55)
return raw_score
def _detect_confidence_calibration(self, text: str) -> float:
"""Detect overclaiming, extreme certainty, stasis claims."""
text_lower = text.lower()
extreme_certain = [
'definitively proven', 'conclusively identified',
'every scientist agrees', 'unanimously accepted',
'completely solved', 'has never been questioned',
'absolutely impossible', 'without any doubt',
'beyond all question', 'it is an undeniable fact',
'already achieved', 'permanently settled',
'now permanently', 'now completely solved',
'conclusively demonstrated', 'passed every',
'without exception', 'ever discovered',
]
moderate_certain = [
'definitely', 'certainly', 'clearly', 'obviously',
'undoubtedly', 'proven', 'always', 'never',
'impossible', 'guaranteed', 'absolutely', 'undeniably',
]
hedging = [
'might', 'could', 'possibly', 'perhaps', 'maybe',
'believed to', 'thought to', 'may have', 'some say',
'it seems', 'apparently', 'might possibly',
'could potentially', 'somewhat',
]
calibrated = [
'approximately', 'roughly', 'about', 'estimated',
'measured', 'observed', 'documented', 'recorded',
'according to', 'based on',
]
stasis_patterns = [
r'has\s+(remained|stayed|been)\s+(unchanged|constant|the\s+same)',
r'has\s+never\s+been\s+(questioned|challenged|disputed|changed|updated)',
r'(unchanged|constant)\s+for\s+\d+\s+(years|decades|centuries)',
r'has\s+not\s+changed\s+(since|in|for)',
]
ext = sum(1 for m in extreme_certain if m in text_lower)
mod = sum(1 for m in moderate_certain if m in text_lower)
hed = sum(1 for m in hedging if m in text_lower)
cal = sum(1 for m in calibrated if m in text_lower)
stasis = sum(1 for p in stasis_patterns if re.search(p, text_lower))
if stasis >= 2:
return 0.10
if stasis >= 1:
ext += 1
if ext >= 2:
return 0.10
if ext >= 1:
return 0.20
if mod >= 3:
return 0.25
if mod > 0 and hed > 0:
return 0.30
if hed >= 3 and cal == 0:
return 0.30
if cal > 0:
return 0.70 + min(0.20, cal * 0.05)
return 0.55
def _detect_qualifying_ratio(self, text: str) -> float:
"""Ratio of qualifying language to absolutist language."""
text_lower = text.lower()
qualifiers = [
'approximately', 'roughly', 'about', 'estimated', 'generally',
'typically', 'usually', 'often', 'one of the', 'some of',
'can vary', 'tends to', 'on average', 'in most cases',
'is thought to', 'is believed to', 'suggests that',
'remains', 'continues to', 'open question',
'at least', 'up to', 'as many as', 'no fewer than',
'as much as', 'under certain', 'depending on',
'may vary', 'not yet', 'not well established',
]
absolutes = [
'exactly', 'precisely', 'definitively', 'conclusively', 'every',
'all', 'none', 'always', 'never', 'only', 'impossible',
'certain', 'undeniably', 'unanimously', 'completely',
'perfectly', 'entirely', 'totally', 'purely',
'already achieved', 'permanently settled', 'permanently',
'without exception', 'single most', 'ever discovered',
'ever devised', 'now completely', 'now permanently',
'for life', 'guarantee',
]
q = sum(1 for m in qualifiers if m in text_lower)
a = sum(1 for m in absolutes if m in text_lower)
if q + a == 0:
return 0.55
ratio = q / (q + a)
if ratio >= 0.8:
base = 0.85
elif ratio >= 0.6:
base = 0.70
elif ratio >= 0.4:
base = 0.55
elif ratio >= 0.2:
base = 0.35
else:
base = 0.15
# Density penalty
n_sentences = max(1, len([s for s in text.split('.') if s.strip()]))
abs_density = a / n_sentences
if abs_density >= 2.0:
base = min(base, 0.15)
elif abs_density >= 1.0:
base = min(base, 0.25)
return base
def _detect_internal_consistency(self, text: str) -> float:
"""Check for contradictory claims within text."""
sentences = re.split(r'[.!?]+', text)
sentences = [s.strip().lower() for s in sentences if len(s.strip()) > 10]
if len(sentences) < 2:
return 0.55
positive = {'increase', 'more', 'greater', 'higher', 'effective', 'can',
'does', 'absorb', 'produce', 'create', 'generate', 'release'}
negative = {'decrease', 'less', 'lower', 'smaller', 'ineffective', 'cannot',
'does not', "doesn't", 'prevent', 'block', 'no', 'not'}
contrast = {'however', 'but', 'although', 'despite', 'nevertheless', 'whereas', 'yet'}
contradictions = 0
for i in range(len(sentences) - 1):
wa = set(sentences[i].split())
wb = set(sentences[i + 1].split())
topic_overlap = (wa & wb) - positive - negative - contrast
topic_overlap -= {'the', 'a', 'an', 'is', 'are', 'of', 'in', 'to', 'and', 'or', 'this', 'that'}
if len(topic_overlap) >= 2:
pa, na = len(wa & positive), len(wa & negative)
pb, nb = len(wb & positive), len(wb & negative)
if (pa > na and nb > pb) or (na > pa and pb > nb):
if not (wb & contrast):
contradictions += 1
if contradictions >= 2: return 0.15
if contradictions == 1: return 0.30
return 0.55
def _detect_topic_coherence(self, text: str) -> float:
"""Vocabulary overlap between sentences — detect topic drift."""
sentences = re.split(r'[.!?]+', text)
sentences = [s.strip() for s in sentences if len(s.strip()) > 5]
if len(sentences) < 2:
return 0.55
stops = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been',
'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will',
'would', 'shall', 'should', 'may', 'might', 'must', 'can',
'could', 'of', 'in', 'to', 'for', 'with', 'on', 'at', 'by',
'from', 'and', 'or', 'but', 'not', 'that', 'this', 'it', 'its',
'as', 'if', 'than', 'so', 'which', 'who', 'what', 'when',
'where', 'how', 'all', 'each', 'every', 'both', 'few', 'more',
'most', 'other', 'some', 'such', 'no', 'only', 'very'}
def cw(s):
return set(s.lower().split()) - stops
all_cw = [cw(s) for s in sentences]
pairs = []
for i in range(len(all_cw) - 1):
if all_cw[i] and all_cw[i + 1]:
union = all_cw[i] | all_cw[i + 1]
if union:
pairs.append(len(all_cw[i] & all_cw[i + 1]) / len(union))
if not pairs:
return 0.55
avg = sum(pairs) / len(pairs)
if len(pairs) >= 2:
if min(pairs) < 0.02 and max(pairs) > 0.08:
return 0.20
if avg < 0.03:
return 0.25
return min(0.85, 0.30 + avg * 4)
def _detect_causal_logic(self, text: str) -> float:
"""Structural causal reasoning check."""
text_lower = text.lower()
good = ['because', 'therefore', 'this is why', 'as a result',
'which causes', 'leading to', 'due to', 'since',
'consequently', 'which means', 'which is why']
nonsense = [
'directly killing all', 'seek out and destroy every',
'decide to change their', 'choose which traits to develop',
'within just a few generations, entirely new',
'the chemicals are working to eliminate',
'this process requires no', 'occurs primarily at night',
]
fabricated_commercial = [
'currently selling', 'currently available', 'on the market',
'already being used', 'can be purchased', 'are now selling',
'provides zero-latency', 'zero-latency connections',
'will develop telekinetic', 'unlock the remaining',
'reverse aging', 'cure any', 'more effective than all',
'permanently boost', 'guarantee protection',
'can permanently', 'reverse tooth decay',
]
g = sum(1 for m in good if m in text_lower)
n = sum(1 for m in nonsense if m in text_lower)
fab = sum(1 for m in fabricated_commercial if m in text_lower)
if fab >= 2: return 0.10
if fab >= 1: return 0.25
if n >= 2: return 0.10
if n >= 1: return 0.25
if g >= 2: return 0.75
if g >= 1: return 0.65
return 0.55
def _detect_negation_density(self, text: str) -> float:
"""
Truth states what IS. Fabrication states what ISN'T.
High negation density is a fabrication signal.
"""
text_lower = text.lower()
words = text_lower.split()
n_words = len(words)
if n_words == 0:
return 0.55
negation_patterns = [
r'\brequires?\s+no\b', r'\bhas\s+no\b', r'\bwith\s+no\b',
r'\bis\s+not\b', r'\bare\s+not\b', r'\bwas\s+not\b',
r'\bdoes\s+not\b', r'\bdo\s+not\b', r'\bcannot\b',
r"\bcan't\b", r"\bdon't\b", r"\bdoesn't\b", r"\bisn't\b",
r"\baren't\b", r"\bwasn't\b", r"\bweren't\b", r"\bhasn't\b",
r"\bhaven't\b", r"\bwon't\b", r"\bshouldn't\b",
r'\bnever\b', r'\bnone\b', r'\bneither\b',
r'\bno\s+(evidence|proof|basis|support|reason)\b',
]
neg_count = sum(1 for p in negation_patterns if re.search(p, text_lower))
density = neg_count / max(1, n_words / 10)
if density >= 1.5:
return 0.15
elif density >= 1.0:
return 0.30
elif density >= 0.5:
return 0.45
elif density > 0:
return 0.55
else:
return 0.65
def _detect_numerical_plausibility(self, text: str) -> float:
"""Round number detection — Benford's Law."""
numbers = re.findall(r'\b(\d+(?:,\d{3})*(?:\.\d+)?)\b', text)
nc = [n.replace(',', '') for n in numbers
if n.replace(',', '').replace('.', '').isdigit()]
if len(nc) < 2:
return 0.55
scores = []
for ns in nc:
try:
n = float(ns)
except ValueError:
continue
if n == 0:
continue
if n >= 100:
s = str(int(n))
tz = len(s) - len(s.rstrip('0'))
roundness = tz / len(s)
scores.append(0.35 if roundness > 0.6 else (0.50 if roundness > 0.4 else 0.70))
return sum(scores) / len(scores) if scores else 0.55
# ============================================================
# LEGACY DIMENSIONS
# ============================================================
def _calculate_phi_alignment(self, text: str) -> float:
vowels = sum(1 for c in text.lower() if c in 'aeiou')
consonants = sum(1 for c in text.lower() if c.isalpha() and c not in 'aeiou')
if vowels == 0:
return 0.3
ratio = consonants / vowels
phi_score = 1.0 - min(1.0, abs(ratio - PHI) / PHI)
words = text.split()
if len(words) >= 2:
avg = sum(len(w) for w in words) / len(words)
ls = 1.0 - min(1.0, abs(avg - 5.0) / 5.0)
else:
ls = 0.5
return phi_score * 0.6 + ls * 0.4
def _calculate_semantic_density(self, text: str) -> float:
words = text.split()
if not words:
return 0.0
ur = len(set(w.lower() for w in words)) / len(words)
avg = sum(len(w) for w in words) / len(words)
ls = 1.0 - min(1.0, abs(avg - 5.5) / 5.5)
return ur * 0.5 + ls * 0.5
def _is_alpha_seed(self, text: str) -> bool:
return int(hashlib.sha256(text.encode()).hexdigest(), 16) % ALPHA == 0
# Singleton
_coherence = PhiCoherence()
def score(text: str) -> float:
"""Quick credibility score (0-1)."""
return _coherence.calculate(text)
def analyze(text: str) -> CoherenceMetrics:
"""Full credibility analysis with all dimensions."""
return _coherence.analyze(text)
def is_alpha_seed(text: str) -> bool:
"""Check if text is an α-SEED (hash % 137 == 0)."""
return int(hashlib.sha256(text.encode()).hexdigest(), 16) % ALPHA == 0