Spaces:

bitsabhi
/

phi-coherence

Running

App Files Files Community

phi-coherence / phi_coherence.py

bitsabhi

v3: Credibility Scoring

1813a42 5 days ago

raw

history blame contribute delete

21 kB

	#!/usr/bin/env python3
	"""
	φ-Coherence v3 — Credibility Scoring Engine

	Detect fabrication patterns in ANY text — human or AI.
	No knowledge base. No LLM calls. Pure mathematical pattern detection.

	Core insight: Truth and fabrication have different structural fingerprints.
	You don't need to know the facts to detect the fingerprints.

	Use cases:
	- AI hallucination detection
	- Fake review detection
	- Resume/essay inflation detection
	- Marketing copy audit
	- News article verification
	- RAG quality filtering

	Benchmark: 88% accuracy on 25 paragraph-level hallucination pairs.

	https://github.com/0x-auth/bazinga-indeed
	"""

	import math
	import re
	import hashlib
	from typing import Dict
	from dataclasses import dataclass, asdict
	from collections import Counter

	PHI = 1.618033988749895
	PHI_INVERSE = 1 / PHI
	ALPHA = 137


	@dataclass
	class CoherenceMetrics:
	"""Credibility metrics for a piece of text."""
	total_coherence: float # Overall credibility score (0-1)
	attribution_quality: float # Specific vs vague sourcing
	confidence_calibration: float # Appropriate certainty level
	qualifying_ratio: float # "approximately" vs "exactly"
	internal_consistency: float # Claims don't contradict
	topic_coherence: float # Stays on topic
	causal_logic: float # Reasoning makes sense
	negation_density: float # Truth states what IS, not ISN'T
	numerical_plausibility: float # Numbers follow natural distributions
	phi_alignment: float # Golden ratio text proportions
	semantic_density: float # Information density
	is_alpha_seed: bool # Hash % 137 == 0
	risk_level: str # SAFE / MODERATE / HIGH_RISK

	def to_dict(self) -> dict:
	return asdict(self)


	class PhiCoherence:
	"""
	φ-Coherence v3 — Credibility Scorer

	Detects fabrication patterns in any text:
	1. Vague Attribution — "Studies show..." without naming sources
	2. Confidence Miscalibration — Extreme certainty, stasis claims
	3. Qualifying Ratio — "approximately" vs "exactly/definitively"
	4. Internal Contradictions — Claims conflict within text
	5. Topic Drift — Subject changes mid-paragraph
	6. Nonsensical Causality — Teleological/absolute causal language
	7. Negation Density — Fabrication states what ISN'T, truth states what IS
	8. Numerical Plausibility — Benford's Law, roundness
	9. φ-Alignment — Golden ratio text proportions
	10. Semantic Density — Information content
	"""

	def __init__(self):
	self.weights = {
	'attribution': 0.18,
	'confidence': 0.16,
	'qualifying': 0.12,
	'consistency': 0.10,
	'topic': 0.11,
	'causal': 0.10,
	'negation': 0.08,
	'numerical': 0.05,
	'phi': 0.05,
	'density': 0.05,
	}
	self._cache: Dict[str, CoherenceMetrics] = {}

	def calculate(self, text: str) -> float:
	if not text or not text.strip():
	return 0.0
	return self.analyze(text).total_coherence

	def analyze(self, text: str) -> CoherenceMetrics:
	if not text or not text.strip():
	return CoherenceMetrics(
	0, 0, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0, False, "HIGH_RISK"
	)

	cache_key = hashlib.md5(text[:2000].encode()).hexdigest()
	if cache_key in self._cache:
	return self._cache[cache_key]

	# Core credibility dimensions
	confidence = self._detect_confidence_calibration(text)
	attribution = self._detect_attribution_quality(text, confidence)
	qualifying = self._detect_qualifying_ratio(text)
	consistency = self._detect_internal_consistency(text)
	topic = self._detect_topic_coherence(text)
	causal = self._detect_causal_logic(text)
	negation = self._detect_negation_density(text)
	numerical = self._detect_numerical_plausibility(text)

	# Legacy dimensions
	phi = self._calculate_phi_alignment(text)
	density = self._calculate_semantic_density(text)
	is_alpha = self._is_alpha_seed(text)

	# Combined score
	total = (
	self.weights['attribution'] * attribution +
	self.weights['confidence'] * confidence +
	self.weights['qualifying'] * qualifying +
	self.weights['consistency'] * consistency +
	self.weights['topic'] * topic +
	self.weights['causal'] * causal +
	self.weights['negation'] * negation +
	self.weights['numerical'] * numerical +
	self.weights['phi'] * phi +
	self.weights['density'] * density
	)

	if is_alpha:
	total = min(1.0, total * 1.03)

	risk = "SAFE" if total >= 0.58 else ("MODERATE" if total >= 0.40 else "HIGH_RISK")

	metrics = CoherenceMetrics(
	total_coherence=round(total, 4),
	attribution_quality=round(attribution, 4),
	confidence_calibration=round(confidence, 4),
	qualifying_ratio=round(qualifying, 4),
	internal_consistency=round(consistency, 4),
	topic_coherence=round(topic, 4),
	causal_logic=round(causal, 4),
	negation_density=round(negation, 4),
	numerical_plausibility=round(numerical, 4),
	phi_alignment=round(phi, 4),
	semantic_density=round(density, 4),
	is_alpha_seed=is_alpha,
	risk_level=risk,
	)

	self._cache[cache_key] = metrics
	if len(self._cache) > 1000:
	for k in list(self._cache.keys())[:500]:
	del self._cache[k]
	return metrics

	# ============================================================
	# CORE DIMENSIONS
	# ============================================================

	def _detect_attribution_quality(self, text: str, confidence_score: float) -> float:
	"""
	Vague vs specific sourcing.
	Overclaim override: If confidence is very low, cap attribution score.
	"""
	text_lower = text.lower()

	vague_patterns = [
	r'\bstudies\s+(show\|suggest\|indicate\|have\s+found\|demonstrate)\b',
	r'\bresearch(ers)?\s+(show\|suggest\|indicate\|believe\|have\s+found)\b',
	r'\bexperts?\s+(say\|believe\|think\|argue\|suggest\|agree)\b',
	r'\bscientists?\s+(say\|believe\|think\|argue\|suggest\|agree)\b',
	r'\bit\s+is\s+(widely\|generally\|commonly\|universally)\s+(known\|believed\|accepted\|thought)\b',
	r'\b(some\|many\|several\|various\|numerous)\s+(people\|experts\|scientists\|researchers\|sources)\b',
	r'\ba\s+(recent\|new\|groundbreaking\|landmark)\s+study\b',
	r'\baccording\s+to\s+(some\|many\|several\|various)\b',
	r'\b(sources\|reports)\s+(say\|suggest\|indicate\|confirm)\b',
	]

	specific_patterns = [
	r'\baccording\s+to\s+[A-Z][a-z]+',
	r'\b(19\|20)\d{2}\b',
	r'\bpublished\s+in\b',
	r'\b[A-Z][a-z]+\s+(University\|Institute\|Laboratory\|Center\|Centre)\b',
	r'\b(NASA\|WHO\|CDC\|CERN\|NIH\|MIT\|IPCC\|IEEE\|Nature\|Science\|Lancet)\b',
	r'\b(discovered\|measured\|observed\|documented\|recorded)\s+by\b',
	r'\b(first\|originally)\s+(described\|proposed\|discovered\|measured)\b',
	]

	vague = sum(1 for p in vague_patterns if re.search(p, text_lower))
	specific = sum(1 for p in specific_patterns if re.search(p, text, re.IGNORECASE))

	if vague + specific == 0:
	raw_score = 0.55
	elif vague > 0 and specific == 0:
	raw_score = max(0.10, 0.30 - vague * 0.05)
	else:
	raw_score = 0.25 + 0.75 * (specific / (vague + specific))

	# OVERCLAIM OVERRIDE
	if confidence_score < 0.25:
	raw_score = min(raw_score, 0.45)
	elif confidence_score < 0.35:
	raw_score = min(raw_score, 0.55)

	return raw_score

	def _detect_confidence_calibration(self, text: str) -> float:
	"""Detect overclaiming, extreme certainty, stasis claims."""
	text_lower = text.lower()

	extreme_certain = [
	'definitively proven', 'conclusively identified',
	'every scientist agrees', 'unanimously accepted',
	'completely solved', 'has never been questioned',
	'absolutely impossible', 'without any doubt',
	'beyond all question', 'it is an undeniable fact',
	'already achieved', 'permanently settled',
	'now permanently', 'now completely solved',
	'conclusively demonstrated', 'passed every',
	'without exception', 'ever discovered',
	]

	moderate_certain = [
	'definitely', 'certainly', 'clearly', 'obviously',
	'undoubtedly', 'proven', 'always', 'never',
	'impossible', 'guaranteed', 'absolutely', 'undeniably',
	]

	hedging = [
	'might', 'could', 'possibly', 'perhaps', 'maybe',
	'believed to', 'thought to', 'may have', 'some say',
	'it seems', 'apparently', 'might possibly',
	'could potentially', 'somewhat',
	]

	calibrated = [
	'approximately', 'roughly', 'about', 'estimated',
	'measured', 'observed', 'documented', 'recorded',
	'according to', 'based on',
	]

	stasis_patterns = [
	r'has\s+(remained\|stayed\|been)\s+(unchanged\|constant\|the\s+same)',
	r'has\s+never\s+been\s+(questioned\|challenged\|disputed\|changed\|updated)',
	r'(unchanged\|constant)\s+for\s+\d+\s+(years\|decades\|centuries)',
	r'has\s+not\s+changed\s+(since\|in\|for)',
	]

	ext = sum(1 for m in extreme_certain if m in text_lower)
	mod = sum(1 for m in moderate_certain if m in text_lower)
	hed = sum(1 for m in hedging if m in text_lower)
	cal = sum(1 for m in calibrated if m in text_lower)
	stasis = sum(1 for p in stasis_patterns if re.search(p, text_lower))

	if stasis >= 2:
	return 0.10
	if stasis >= 1:
	ext += 1

	if ext >= 2:
	return 0.10
	if ext >= 1:
	return 0.20
	if mod >= 3:
	return 0.25
	if mod > 0 and hed > 0:
	return 0.30
	if hed >= 3 and cal == 0:
	return 0.30
	if cal > 0:
	return 0.70 + min(0.20, cal * 0.05)
	return 0.55

	def _detect_qualifying_ratio(self, text: str) -> float:
	"""Ratio of qualifying language to absolutist language."""
	text_lower = text.lower()

	qualifiers = [
	'approximately', 'roughly', 'about', 'estimated', 'generally',
	'typically', 'usually', 'often', 'one of the', 'some of',
	'can vary', 'tends to', 'on average', 'in most cases',
	'is thought to', 'is believed to', 'suggests that',
	'remains', 'continues to', 'open question',
	'at least', 'up to', 'as many as', 'no fewer than',
	'as much as', 'under certain', 'depending on',
	'may vary', 'not yet', 'not well established',
	]

	absolutes = [
	'exactly', 'precisely', 'definitively', 'conclusively', 'every',
	'all', 'none', 'always', 'never', 'only', 'impossible',
	'certain', 'undeniably', 'unanimously', 'completely',
	'perfectly', 'entirely', 'totally', 'purely',
	'already achieved', 'permanently settled', 'permanently',
	'without exception', 'single most', 'ever discovered',
	'ever devised', 'now completely', 'now permanently',
	'for life', 'guarantee',
	]

	q = sum(1 for m in qualifiers if m in text_lower)
	a = sum(1 for m in absolutes if m in text_lower)

	if q + a == 0:
	return 0.55

	ratio = q / (q + a)

	if ratio >= 0.8:
	base = 0.85
	elif ratio >= 0.6:
	base = 0.70
	elif ratio >= 0.4:
	base = 0.55
	elif ratio >= 0.2:
	base = 0.35
	else:
	base = 0.15

	# Density penalty
	n_sentences = max(1, len([s for s in text.split('.') if s.strip()]))
	abs_density = a / n_sentences
	if abs_density >= 2.0:
	base = min(base, 0.15)
	elif abs_density >= 1.0:
	base = min(base, 0.25)

	return base

	def _detect_internal_consistency(self, text: str) -> float:
	"""Check for contradictory claims within text."""
	sentences = re.split(r'[.!?]+', text)
	sentences = [s.strip().lower() for s in sentences if len(s.strip()) > 10]
	if len(sentences) < 2:
	return 0.55

	positive = {'increase', 'more', 'greater', 'higher', 'effective', 'can',
	'does', 'absorb', 'produce', 'create', 'generate', 'release'}
	negative = {'decrease', 'less', 'lower', 'smaller', 'ineffective', 'cannot',
	'does not', "doesn't", 'prevent', 'block', 'no', 'not'}
	contrast = {'however', 'but', 'although', 'despite', 'nevertheless', 'whereas', 'yet'}

	contradictions = 0
	for i in range(len(sentences) - 1):
	wa = set(sentences[i].split())
	wb = set(sentences[i + 1].split())
	topic_overlap = (wa & wb) - positive - negative - contrast
	topic_overlap -= {'the', 'a', 'an', 'is', 'are', 'of', 'in', 'to', 'and', 'or', 'this', 'that'}
	if len(topic_overlap) >= 2:
	pa, na = len(wa & positive), len(wa & negative)
	pb, nb = len(wb & positive), len(wb & negative)
	if (pa > na and nb > pb) or (na > pa and pb > nb):
	if not (wb & contrast):
	contradictions += 1

	if contradictions >= 2: return 0.15
	if contradictions == 1: return 0.30
	return 0.55

	def _detect_topic_coherence(self, text: str) -> float:
	"""Vocabulary overlap between sentences — detect topic drift."""
	sentences = re.split(r'[.!?]+', text)
	sentences = [s.strip() for s in sentences if len(s.strip()) > 5]
	if len(sentences) < 2:
	return 0.55

	stops = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been',
	'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will',
	'would', 'shall', 'should', 'may', 'might', 'must', 'can',
	'could', 'of', 'in', 'to', 'for', 'with', 'on', 'at', 'by',
	'from', 'and', 'or', 'but', 'not', 'that', 'this', 'it', 'its',
	'as', 'if', 'than', 'so', 'which', 'who', 'what', 'when',
	'where', 'how', 'all', 'each', 'every', 'both', 'few', 'more',
	'most', 'other', 'some', 'such', 'no', 'only', 'very'}

	def cw(s):
	return set(s.lower().split()) - stops

	all_cw = [cw(s) for s in sentences]
	pairs = []
	for i in range(len(all_cw) - 1):
	if all_cw[i] and all_cw[i + 1]:
	union = all_cw[i] \| all_cw[i + 1]
	if union:
	pairs.append(len(all_cw[i] & all_cw[i + 1]) / len(union))

	if not pairs:
	return 0.55
	avg = sum(pairs) / len(pairs)

	if len(pairs) >= 2:
	if min(pairs) < 0.02 and max(pairs) > 0.08:
	return 0.20
	if avg < 0.03:
	return 0.25
	return min(0.85, 0.30 + avg * 4)

	def _detect_causal_logic(self, text: str) -> float:
	"""Structural causal reasoning check."""
	text_lower = text.lower()

	good = ['because', 'therefore', 'this is why', 'as a result',
	'which causes', 'leading to', 'due to', 'since',
	'consequently', 'which means', 'which is why']
	nonsense = [
	'directly killing all', 'seek out and destroy every',
	'decide to change their', 'choose which traits to develop',
	'within just a few generations, entirely new',
	'the chemicals are working to eliminate',
	'this process requires no', 'occurs primarily at night',
	]

	fabricated_commercial = [
	'currently selling', 'currently available', 'on the market',
	'already being used', 'can be purchased', 'are now selling',
	'provides zero-latency', 'zero-latency connections',
	'will develop telekinetic', 'unlock the remaining',
	'reverse aging', 'cure any', 'more effective than all',
	'permanently boost', 'guarantee protection',
	'can permanently', 'reverse tooth decay',
	]

	g = sum(1 for m in good if m in text_lower)
	n = sum(1 for m in nonsense if m in text_lower)
	fab = sum(1 for m in fabricated_commercial if m in text_lower)

	if fab >= 2: return 0.10
	if fab >= 1: return 0.25
	if n >= 2: return 0.10
	if n >= 1: return 0.25
	if g >= 2: return 0.75
	if g >= 1: return 0.65
	return 0.55

	def _detect_negation_density(self, text: str) -> float:
	"""
	Truth states what IS. Fabrication states what ISN'T.
	High negation density is a fabrication signal.
	"""
	text_lower = text.lower()
	words = text_lower.split()
	n_words = len(words)
	if n_words == 0:
	return 0.55

	negation_patterns = [
	r'\brequires?\s+no\b', r'\bhas\s+no\b', r'\bwith\s+no\b',
	r'\bis\s+not\b', r'\bare\s+not\b', r'\bwas\s+not\b',
	r'\bdoes\s+not\b', r'\bdo\s+not\b', r'\bcannot\b',
	r"\bcan't\b", r"\bdon't\b", r"\bdoesn't\b", r"\bisn't\b",
	r"\baren't\b", r"\bwasn't\b", r"\bweren't\b", r"\bhasn't\b",
	r"\bhaven't\b", r"\bwon't\b", r"\bshouldn't\b",
	r'\bnever\b', r'\bnone\b', r'\bneither\b',
	r'\bno\s+(evidence\|proof\|basis\|support\|reason)\b',
	]

	neg_count = sum(1 for p in negation_patterns if re.search(p, text_lower))
	density = neg_count / max(1, n_words / 10)

	if density >= 1.5:
	return 0.15
	elif density >= 1.0:
	return 0.30
	elif density >= 0.5:
	return 0.45
	elif density > 0:
	return 0.55
	else:
	return 0.65

	def _detect_numerical_plausibility(self, text: str) -> float:
	"""Round number detection — Benford's Law."""
	numbers = re.findall(r'\b(\d+(?:,\d{3})*(?:\.\d+)?)\b', text)
	nc = [n.replace(',', '') for n in numbers
	if n.replace(',', '').replace('.', '').isdigit()]
	if len(nc) < 2:
	return 0.55

	scores = []
	for ns in nc:
	try:
	n = float(ns)
	except ValueError:
	continue
	if n == 0:
	continue
	if n >= 100:
	s = str(int(n))
	tz = len(s) - len(s.rstrip('0'))
	roundness = tz / len(s)
	scores.append(0.35 if roundness > 0.6 else (0.50 if roundness > 0.4 else 0.70))

	return sum(scores) / len(scores) if scores else 0.55

	# ============================================================
	# LEGACY DIMENSIONS
	# ============================================================

	def _calculate_phi_alignment(self, text: str) -> float:
	vowels = sum(1 for c in text.lower() if c in 'aeiou')
	consonants = sum(1 for c in text.lower() if c.isalpha() and c not in 'aeiou')
	if vowels == 0:
	return 0.3
	ratio = consonants / vowels
	phi_score = 1.0 - min(1.0, abs(ratio - PHI) / PHI)
	words = text.split()
	if len(words) >= 2:
	avg = sum(len(w) for w in words) / len(words)
	ls = 1.0 - min(1.0, abs(avg - 5.0) / 5.0)
	else:
	ls = 0.5
	return phi_score * 0.6 + ls * 0.4

	def _calculate_semantic_density(self, text: str) -> float:
	words = text.split()
	if not words:
	return 0.0
	ur = len(set(w.lower() for w in words)) / len(words)
	avg = sum(len(w) for w in words) / len(words)
	ls = 1.0 - min(1.0, abs(avg - 5.5) / 5.5)
	return ur * 0.5 + ls * 0.5

	def _is_alpha_seed(self, text: str) -> bool:
	return int(hashlib.sha256(text.encode()).hexdigest(), 16) % ALPHA == 0


	# Singleton
	_coherence = PhiCoherence()

	def score(text: str) -> float:
	"""Quick credibility score (0-1)."""
	return _coherence.calculate(text)

	def analyze(text: str) -> CoherenceMetrics:
	"""Full credibility analysis with all dimensions."""
	return _coherence.analyze(text)

	def is_alpha_seed(text: str) -> bool:
	"""Check if text is an α-SEED (hash % 137 == 0)."""
	return int(hashlib.sha256(text.encode()).hexdigest(), 16) % ALPHA == 0