Spaces:

FocusGuard
/

FocusGuardBaseModel

Sleeping

File size: 3,440 Bytes

c86c45b

import os
import time
from pathlib import Path
from urllib.request import urlretrieve

import cv2
import numpy as np
import mediapipe as mp
from mediapipe.tasks.python.vision import FaceLandmarkerOptions, FaceLandmarker, RunningMode
from mediapipe.tasks import python as mp_tasks

_MODEL_URL = (
    "https://storage.googleapis.com/mediapipe-models/face_landmarker/"
    "face_landmarker/float16/latest/face_landmarker.task"
)


def _ensure_model() -> str:
    cache_dir = Path(os.environ.get(
        "FOCUSGUARD_CACHE_DIR",
        Path.home() / ".cache" / "focusguard",
    ))
    model_path = cache_dir / "face_landmarker.task"
    if model_path.exists():
        return str(model_path)
    cache_dir.mkdir(parents=True, exist_ok=True)
    print(f"[FACE_MESH] Downloading model to {model_path}...")
    urlretrieve(_MODEL_URL, model_path)
    print("[FACE_MESH] Download complete.")
    return str(model_path)


class FaceMeshDetector:
    LEFT_EYE_INDICES = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246]
    RIGHT_EYE_INDICES = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398]
    LEFT_IRIS_INDICES = [468, 469, 470, 471, 472]
    RIGHT_IRIS_INDICES = [473, 474, 475, 476, 477]

    def __init__(
        self,
        max_num_faces: int = 1,
        min_detection_confidence: float = 0.5,
        min_tracking_confidence: float = 0.5,
    ):
        model_path = _ensure_model()
        options = FaceLandmarkerOptions(
            base_options=mp_tasks.BaseOptions(model_asset_path=model_path),
            num_faces=max_num_faces,
            min_face_detection_confidence=min_detection_confidence,
            min_face_presence_confidence=min_detection_confidence,
            min_tracking_confidence=min_tracking_confidence,
            running_mode=RunningMode.VIDEO,
        )
        self._landmarker = FaceLandmarker.create_from_options(options)
        self._t0 = time.monotonic()
        self._last_ts = 0

    def process(self, bgr_frame: np.ndarray) -> np.ndarray | None:
        # BGR in -> (478,3) norm x,y,z or None
        rgb = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
        ts = max(int((time.monotonic() - self._t0) * 1000), self._last_ts + 1)
        self._last_ts = ts
        result = self._landmarker.detect_for_video(mp_image, ts)

        if not result.face_landmarks:
            return None

        face = result.face_landmarks[0]
        return np.array([(lm.x, lm.y, lm.z) for lm in face], dtype=np.float32)

    def get_pixel_landmarks(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> np.ndarray:
        # norm -> pixel (x,y)
        pixel = np.zeros((landmarks.shape[0], 2), dtype=np.int32)
        pixel[:, 0] = (landmarks[:, 0] * frame_w).astype(np.int32)
        pixel[:, 1] = (landmarks[:, 1] * frame_h).astype(np.int32)
        return pixel

    def get_3d_landmarks(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> np.ndarray:
        # norm -> pixel-scale x,y,z (z scaled by width)
        pts = np.zeros_like(landmarks)
        pts[:, 0] = landmarks[:, 0] * frame_w
        pts[:, 1] = landmarks[:, 1] * frame_h
        pts[:, 2] = landmarks[:, 2] * frame_w
        return pts

    def close(self):
        self._landmarker.close()

    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.close()