FocusGuardBaseModel / models /eye_crop.py
Kexin-251202's picture
Deploy base model
c86c45b verified
import cv2
import numpy as np
from models.face_mesh import FaceMeshDetector
LEFT_EYE_CONTOUR = FaceMeshDetector.LEFT_EYE_INDICES
RIGHT_EYE_CONTOUR = FaceMeshDetector.RIGHT_EYE_INDICES
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)
CROP_SIZE = 96
def _bbox_from_landmarks(
landmarks: np.ndarray,
indices: list[int],
frame_w: int,
frame_h: int,
expand: float = 0.4,
) -> tuple[int, int, int, int]:
pts = landmarks[indices, :2]
px = pts[:, 0] * frame_w
py = pts[:, 1] * frame_h
x_min, x_max = px.min(), px.max()
y_min, y_max = py.min(), py.max()
w = x_max - x_min
h = y_max - y_min
cx = (x_min + x_max) / 2
cy = (y_min + y_max) / 2
size = max(w, h) * (1 + expand)
half = size / 2
x1 = int(max(cx - half, 0))
y1 = int(max(cy - half, 0))
x2 = int(min(cx + half, frame_w))
y2 = int(min(cy + half, frame_h))
return x1, y1, x2, y2
def extract_eye_crops(
frame: np.ndarray,
landmarks: np.ndarray,
expand: float = 0.4,
crop_size: int = CROP_SIZE,
) -> tuple[np.ndarray, np.ndarray, tuple, tuple]:
h, w = frame.shape[:2]
left_bbox = _bbox_from_landmarks(landmarks, LEFT_EYE_CONTOUR, w, h, expand)
right_bbox = _bbox_from_landmarks(landmarks, RIGHT_EYE_CONTOUR, w, h, expand)
left_crop = frame[left_bbox[1] : left_bbox[3], left_bbox[0] : left_bbox[2]]
right_crop = frame[right_bbox[1] : right_bbox[3], right_bbox[0] : right_bbox[2]]
if left_crop.size == 0:
left_crop = np.zeros((crop_size, crop_size, 3), dtype=np.uint8)
else:
left_crop = cv2.resize(left_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
if right_crop.size == 0:
right_crop = np.zeros((crop_size, crop_size, 3), dtype=np.uint8)
else:
right_crop = cv2.resize(right_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
return left_crop, right_crop, left_bbox, right_bbox
def crop_to_tensor(crop_bgr: np.ndarray):
import torch
rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
for c in range(3):
rgb[:, :, c] = (rgb[:, :, c] - IMAGENET_MEAN[c]) / IMAGENET_STD[c]
return torch.from_numpy(rgb.transpose(2, 0, 1))