Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| from pathlib import Path | |
| from urllib.request import urlretrieve | |
| import cv2 | |
| import numpy as np | |
| import mediapipe as mp | |
| from mediapipe.tasks.python.vision import FaceLandmarkerOptions, FaceLandmarker, RunningMode | |
| from mediapipe.tasks import python as mp_tasks | |
| _MODEL_URL = ( | |
| "https://storage.googleapis.com/mediapipe-models/face_landmarker/" | |
| "face_landmarker/float16/latest/face_landmarker.task" | |
| ) | |
| def _ensure_model() -> str: | |
| cache_dir = Path(os.environ.get( | |
| "FOCUSGUARD_CACHE_DIR", | |
| Path.home() / ".cache" / "focusguard", | |
| )) | |
| model_path = cache_dir / "face_landmarker.task" | |
| if model_path.exists(): | |
| return str(model_path) | |
| cache_dir.mkdir(parents=True, exist_ok=True) | |
| print(f"[FACE_MESH] Downloading model to {model_path}...") | |
| urlretrieve(_MODEL_URL, model_path) | |
| print("[FACE_MESH] Download complete.") | |
| return str(model_path) | |
| class FaceMeshDetector: | |
| LEFT_EYE_INDICES = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246] | |
| RIGHT_EYE_INDICES = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398] | |
| LEFT_IRIS_INDICES = [468, 469, 470, 471, 472] | |
| RIGHT_IRIS_INDICES = [473, 474, 475, 476, 477] | |
| def __init__( | |
| self, | |
| max_num_faces: int = 1, | |
| min_detection_confidence: float = 0.5, | |
| min_tracking_confidence: float = 0.5, | |
| ): | |
| model_path = _ensure_model() | |
| options = FaceLandmarkerOptions( | |
| base_options=mp_tasks.BaseOptions(model_asset_path=model_path), | |
| num_faces=max_num_faces, | |
| min_face_detection_confidence=min_detection_confidence, | |
| min_face_presence_confidence=min_detection_confidence, | |
| min_tracking_confidence=min_tracking_confidence, | |
| running_mode=RunningMode.VIDEO, | |
| ) | |
| self._landmarker = FaceLandmarker.create_from_options(options) | |
| self._t0 = time.monotonic() | |
| self._last_ts = 0 | |
| def process(self, bgr_frame: np.ndarray) -> np.ndarray | None: | |
| # BGR in -> (478,3) norm x,y,z or None | |
| rgb = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB) | |
| mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb) | |
| ts = max(int((time.monotonic() - self._t0) * 1000), self._last_ts + 1) | |
| self._last_ts = ts | |
| result = self._landmarker.detect_for_video(mp_image, ts) | |
| if not result.face_landmarks: | |
| return None | |
| face = result.face_landmarks[0] | |
| return np.array([(lm.x, lm.y, lm.z) for lm in face], dtype=np.float32) | |
| def get_pixel_landmarks(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> np.ndarray: | |
| # norm -> pixel (x,y) | |
| pixel = np.zeros((landmarks.shape[0], 2), dtype=np.int32) | |
| pixel[:, 0] = (landmarks[:, 0] * frame_w).astype(np.int32) | |
| pixel[:, 1] = (landmarks[:, 1] * frame_h).astype(np.int32) | |
| return pixel | |
| def get_3d_landmarks(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> np.ndarray: | |
| # norm -> pixel-scale x,y,z (z scaled by width) | |
| pts = np.zeros_like(landmarks) | |
| pts[:, 0] = landmarks[:, 0] * frame_w | |
| pts[:, 1] = landmarks[:, 1] * frame_h | |
| pts[:, 2] = landmarks[:, 2] * frame_w | |
| return pts | |
| def close(self): | |
| self._landmarker.close() | |
| def __enter__(self): | |
| return self | |
| def __exit__(self, *args): | |
| self.close() | |