| """Regex patterns and column-name heuristics for PII detection. | |
| Used by catalog/pii_detector.py at ingestion time. Default policy: | |
| when in doubt, set pii_flag=True. False positives cost nothing; false | |
| negatives leak data. | |
| """ | |
| import re | |
| PII_NAME_PATTERNS = frozenset({ | |
| "email", | |
| "phone", "mobile", "telp", "telephone", | |
| "ssn", "tin", "passport", "ktp", "nik", | |
| "name", "fullname", "first_name", "last_name", "surname", | |
| "address", "street", "zipcode", "postal", | |
| "birthdate", "dob", "birthday", | |
| }) | |
| EMAIL_REGEX = re.compile(r"^[\w.+-]+@[\w-]+\.[\w.-]+$") | |
| PHONE_REGEX = re.compile(r"^\+?[\d\s\-()]{7,}$") | |