IntegrationTest / models /face_mesh.py
Yingtao-Zheng's picture
Upload partially updated files
8bbb872
import os
import time
from pathlib import Path
from urllib.request import urlretrieve
import cv2
import numpy as np
import mediapipe as mp
from mediapipe.tasks.python.vision import FaceLandmarkerOptions, FaceLandmarker, RunningMode
from mediapipe.tasks import python as mp_tasks
_MODEL_URL = (
"https://storage.googleapis.com/mediapipe-models/face_landmarker/"
"face_landmarker/float16/latest/face_landmarker.task"
)
def _ensure_model() -> str:
cache_dir = Path(os.environ.get(
"FOCUSGUARD_CACHE_DIR",
Path.home() / ".cache" / "focusguard",
))
model_path = cache_dir / "face_landmarker.task"
if model_path.exists():
return str(model_path)
cache_dir.mkdir(parents=True, exist_ok=True)
print(f"[FACE_MESH] Downloading model to {model_path}...")
urlretrieve(_MODEL_URL, model_path)
print("[FACE_MESH] Download complete.")
return str(model_path)
class FaceMeshDetector:
LEFT_EYE_INDICES = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246]
RIGHT_EYE_INDICES = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398]
LEFT_IRIS_INDICES = [468, 469, 470, 471, 472]
RIGHT_IRIS_INDICES = [473, 474, 475, 476, 477]
def __init__(
self,
max_num_faces: int = 1,
min_detection_confidence: float = 0.5,
min_tracking_confidence: float = 0.5,
):
model_path = _ensure_model()
options = FaceLandmarkerOptions(
base_options=mp_tasks.BaseOptions(model_asset_path=model_path),
num_faces=max_num_faces,
min_face_detection_confidence=min_detection_confidence,
min_face_presence_confidence=min_detection_confidence,
min_tracking_confidence=min_tracking_confidence,
running_mode=RunningMode.VIDEO,
)
self._landmarker = FaceLandmarker.create_from_options(options)
self._t0 = time.monotonic()
self._last_ts = 0
def process(self, bgr_frame: np.ndarray) -> np.ndarray | None:
# BGR in -> (478,3) norm x,y,z or None
rgb = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
ts = max(int((time.monotonic() - self._t0) * 1000), self._last_ts + 1)
self._last_ts = ts
result = self._landmarker.detect_for_video(mp_image, ts)
if not result.face_landmarks:
return None
face = result.face_landmarks[0]
return np.array([(lm.x, lm.y, lm.z) for lm in face], dtype=np.float32)
def get_pixel_landmarks(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> np.ndarray:
# norm -> pixel (x,y)
pixel = np.zeros((landmarks.shape[0], 2), dtype=np.int32)
pixel[:, 0] = (landmarks[:, 0] * frame_w).astype(np.int32)
pixel[:, 1] = (landmarks[:, 1] * frame_h).astype(np.int32)
return pixel
def get_3d_landmarks(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> np.ndarray:
# norm -> pixel-scale x,y,z (z scaled by width)
pts = np.zeros_like(landmarks)
pts[:, 0] = landmarks[:, 0] * frame_w
pts[:, 1] = landmarks[:, 1] * frame_h
pts[:, 2] = landmarks[:, 2] * frame_w
return pts
def close(self):
self._landmarker.close()
def __enter__(self):
return self
def __exit__(self, *args):
self.close()