File size: 625 Bytes
6bff5d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
"""Regex patterns and column-name heuristics for PII detection.

Used by catalog/pii_detector.py at ingestion time. Default policy:
when in doubt, set pii_flag=True. False positives cost nothing; false
negatives leak data.
"""

import re

PII_NAME_PATTERNS = frozenset({
    "email",
    "phone", "mobile", "telp", "telephone",
    "ssn", "tin", "passport", "ktp", "nik",
    "name", "fullname", "first_name", "last_name", "surname",
    "address", "street", "zipcode", "postal",
    "birthdate", "dob", "birthday",
})

EMAIL_REGEX = re.compile(r"^[\w.+-]+@[\w-]+\.[\w.-]+$")
PHONE_REGEX = re.compile(r"^\+?[\d\s\-()]{7,}$")