"""Regex patterns and column-name heuristics for PII detection. Used by catalog/pii_detector.py at ingestion time. Default policy: when in doubt, set pii_flag=True. False positives cost nothing; false negatives leak data. """ import re PII_NAME_PATTERNS = frozenset({ "email", "phone", "mobile", "telp", "telephone", "ssn", "tin", "passport", "ktp", "nik", "name", "fullname", "first_name", "last_name", "surname", "address", "street", "zipcode", "postal", "birthdate", "dob", "birthday", }) EMAIL_REGEX = re.compile(r"^[\w.+-]+@[\w-]+\.[\w.-]+$") PHONE_REGEX = re.compile(r"^\+?[\d\s\-()]{7,}$")