Spaces:

FocusGuard
/

FocusGuardBaseModel

Sleeping

File size: 2,249 Bytes

c86c45b

import cv2
import numpy as np

from models.face_mesh import FaceMeshDetector

LEFT_EYE_CONTOUR = FaceMeshDetector.LEFT_EYE_INDICES
RIGHT_EYE_CONTOUR = FaceMeshDetector.RIGHT_EYE_INDICES

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)

CROP_SIZE = 96


def _bbox_from_landmarks(
    landmarks: np.ndarray,
    indices: list[int],
    frame_w: int,
    frame_h: int,
    expand: float = 0.4,
) -> tuple[int, int, int, int]:
    pts = landmarks[indices, :2]
    px = pts[:, 0] * frame_w
    py = pts[:, 1] * frame_h

    x_min, x_max = px.min(), px.max()
    y_min, y_max = py.min(), py.max()
    w = x_max - x_min
    h = y_max - y_min
    cx = (x_min + x_max) / 2
    cy = (y_min + y_max) / 2

    size = max(w, h) * (1 + expand)
    half = size / 2

    x1 = int(max(cx - half, 0))
    y1 = int(max(cy - half, 0))
    x2 = int(min(cx + half, frame_w))
    y2 = int(min(cy + half, frame_h))

    return x1, y1, x2, y2


def extract_eye_crops(
    frame: np.ndarray,
    landmarks: np.ndarray,
    expand: float = 0.4,
    crop_size: int = CROP_SIZE,
) -> tuple[np.ndarray, np.ndarray, tuple, tuple]:
    h, w = frame.shape[:2]

    left_bbox = _bbox_from_landmarks(landmarks, LEFT_EYE_CONTOUR, w, h, expand)
    right_bbox = _bbox_from_landmarks(landmarks, RIGHT_EYE_CONTOUR, w, h, expand)

    left_crop = frame[left_bbox[1] : left_bbox[3], left_bbox[0] : left_bbox[2]]
    right_crop = frame[right_bbox[1] : right_bbox[3], right_bbox[0] : right_bbox[2]]

    if left_crop.size == 0:
        left_crop = np.zeros((crop_size, crop_size, 3), dtype=np.uint8)
    else:
        left_crop = cv2.resize(left_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)

    if right_crop.size == 0:
        right_crop = np.zeros((crop_size, crop_size, 3), dtype=np.uint8)
    else:
        right_crop = cv2.resize(right_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)

    return left_crop, right_crop, left_bbox, right_bbox


def crop_to_tensor(crop_bgr: np.ndarray):
    import torch

    rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
    for c in range(3):
        rgb[:, :, c] = (rgb[:, :, c] - IMAGENET_MEAN[c]) / IMAGENET_STD[c]
    return torch.from_numpy(rgb.transpose(2, 0, 1))