import cv2 import numpy as np from models.face_mesh import FaceMeshDetector LEFT_EYE_CONTOUR = FaceMeshDetector.LEFT_EYE_INDICES RIGHT_EYE_CONTOUR = FaceMeshDetector.RIGHT_EYE_INDICES IMAGENET_MEAN = (0.485, 0.456, 0.406) IMAGENET_STD = (0.229, 0.224, 0.225) CROP_SIZE = 96 def _bbox_from_landmarks( landmarks: np.ndarray, indices: list[int], frame_w: int, frame_h: int, expand: float = 0.4, ) -> tuple[int, int, int, int]: pts = landmarks[indices, :2] px = pts[:, 0] * frame_w py = pts[:, 1] * frame_h x_min, x_max = px.min(), px.max() y_min, y_max = py.min(), py.max() w = x_max - x_min h = y_max - y_min cx = (x_min + x_max) / 2 cy = (y_min + y_max) / 2 size = max(w, h) * (1 + expand) half = size / 2 x1 = int(max(cx - half, 0)) y1 = int(max(cy - half, 0)) x2 = int(min(cx + half, frame_w)) y2 = int(min(cy + half, frame_h)) return x1, y1, x2, y2 def extract_eye_crops( frame: np.ndarray, landmarks: np.ndarray, expand: float = 0.4, crop_size: int = CROP_SIZE, ) -> tuple[np.ndarray, np.ndarray, tuple, tuple]: h, w = frame.shape[:2] left_bbox = _bbox_from_landmarks(landmarks, LEFT_EYE_CONTOUR, w, h, expand) right_bbox = _bbox_from_landmarks(landmarks, RIGHT_EYE_CONTOUR, w, h, expand) left_crop = frame[left_bbox[1] : left_bbox[3], left_bbox[0] : left_bbox[2]] right_crop = frame[right_bbox[1] : right_bbox[3], right_bbox[0] : right_bbox[2]] if left_crop.size == 0: left_crop = np.zeros((crop_size, crop_size, 3), dtype=np.uint8) else: left_crop = cv2.resize(left_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA) if right_crop.size == 0: right_crop = np.zeros((crop_size, crop_size, 3), dtype=np.uint8) else: right_crop = cv2.resize(right_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA) return left_crop, right_crop, left_bbox, right_bbox def crop_to_tensor(crop_bgr: np.ndarray): import torch rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0 for c in range(3): rgb[:, :, c] = (rgb[:, :, c] - IMAGENET_MEAN[c]) / IMAGENET_STD[c] return torch.from_numpy(rgb.transpose(2, 0, 1))