import os import time from pathlib import Path from urllib.request import urlretrieve import cv2 import numpy as np import mediapipe as mp from mediapipe.tasks.python.vision import FaceLandmarkerOptions, FaceLandmarker, RunningMode from mediapipe.tasks import python as mp_tasks _MODEL_URL = ( "https://storage.googleapis.com/mediapipe-models/face_landmarker/" "face_landmarker/float16/latest/face_landmarker.task" ) def _ensure_model() -> str: cache_dir = Path(os.environ.get( "FOCUSGUARD_CACHE_DIR", Path.home() / ".cache" / "focusguard", )) model_path = cache_dir / "face_landmarker.task" if model_path.exists(): return str(model_path) cache_dir.mkdir(parents=True, exist_ok=True) print(f"[FACE_MESH] Downloading model to {model_path}...") urlretrieve(_MODEL_URL, model_path) print("[FACE_MESH] Download complete.") return str(model_path) class FaceMeshDetector: LEFT_EYE_INDICES = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246] RIGHT_EYE_INDICES = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398] LEFT_IRIS_INDICES = [468, 469, 470, 471, 472] RIGHT_IRIS_INDICES = [473, 474, 475, 476, 477] def __init__( self, max_num_faces: int = 1, min_detection_confidence: float = 0.5, min_tracking_confidence: float = 0.5, ): model_path = _ensure_model() options = FaceLandmarkerOptions( base_options=mp_tasks.BaseOptions(model_asset_path=model_path), num_faces=max_num_faces, min_face_detection_confidence=min_detection_confidence, min_face_presence_confidence=min_detection_confidence, min_tracking_confidence=min_tracking_confidence, running_mode=RunningMode.VIDEO, ) self._landmarker = FaceLandmarker.create_from_options(options) self._t0 = time.monotonic() self._last_ts = 0 def process(self, bgr_frame: np.ndarray) -> np.ndarray | None: # BGR in -> (478,3) norm x,y,z or None rgb = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB) mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb) ts = max(int((time.monotonic() - self._t0) * 1000), self._last_ts + 1) self._last_ts = ts result = self._landmarker.detect_for_video(mp_image, ts) if not result.face_landmarks: return None face = result.face_landmarks[0] return np.array([(lm.x, lm.y, lm.z) for lm in face], dtype=np.float32) def get_pixel_landmarks(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> np.ndarray: # norm -> pixel (x,y) pixel = np.zeros((landmarks.shape[0], 2), dtype=np.int32) pixel[:, 0] = (landmarks[:, 0] * frame_w).astype(np.int32) pixel[:, 1] = (landmarks[:, 1] * frame_h).astype(np.int32) return pixel def get_3d_landmarks(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> np.ndarray: # norm -> pixel-scale x,y,z (z scaled by width) pts = np.zeros_like(landmarks) pts[:, 0] = landmarks[:, 0] * frame_w pts[:, 1] = landmarks[:, 1] * frame_h pts[:, 2] = landmarks[:, 2] * frame_w return pts def close(self): self._landmarker.close() def __enter__(self): return self def __exit__(self, *args): self.close()