from __future__ import annotations import math import cv2 import numpy as np _LANDMARK_INDICES = [1, 152, 33, 263, 61, 291] _MODEL_POINTS = np.array( [ [0.0, 0.0, 0.0], [0.0, -330.0, -65.0], [-225.0, 170.0, -135.0], [225.0, 170.0, -135.0], [-150.0, -150.0, -125.0], [150.0, -150.0, -125.0], ], dtype=np.float64, ) class HeadPoseEstimator: def __init__(self, max_angle: float = 30.0, roll_weight: float = 0.5): self.max_angle = max_angle self.roll_weight = roll_weight self._camera_matrix = None self._frame_size = None self._dist_coeffs = np.zeros((4, 1), dtype=np.float64) self._cache_key = None self._cache_result = None def _get_camera_matrix(self, frame_w: int, frame_h: int) -> np.ndarray: if self._camera_matrix is not None and self._frame_size == (frame_w, frame_h): return self._camera_matrix focal_length = float(frame_w) cx, cy = frame_w / 2.0, frame_h / 2.0 self._camera_matrix = np.array( [[focal_length, 0, cx], [0, focal_length, cy], [0, 0, 1]], dtype=np.float64, ) self._frame_size = (frame_w, frame_h) return self._camera_matrix def _solve(self, landmarks: np.ndarray, frame_w: int, frame_h: int): key = (landmarks.data.tobytes(), frame_w, frame_h) if self._cache_key == key: return self._cache_result image_points = np.array( [ [landmarks[i, 0] * frame_w, landmarks[i, 1] * frame_h] for i in _LANDMARK_INDICES ], dtype=np.float64, ) camera_matrix = self._get_camera_matrix(frame_w, frame_h) success, rvec, tvec = cv2.solvePnP( _MODEL_POINTS, image_points, camera_matrix, self._dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE, ) result = (success, rvec, tvec, image_points) self._cache_key = key self._cache_result = result return result def estimate( self, landmarks: np.ndarray, frame_w: int, frame_h: int ) -> tuple[float, float, float] | None: success, rvec, tvec, _ = self._solve(landmarks, frame_w, frame_h) if not success: return None rmat, _ = cv2.Rodrigues(rvec) nose_dir = rmat @ np.array([0.0, 0.0, 1.0]) face_up = rmat @ np.array([0.0, 1.0, 0.0]) yaw = math.degrees(math.atan2(nose_dir[0], -nose_dir[2])) pitch = math.degrees(math.asin(np.clip(-nose_dir[1], -1.0, 1.0))) roll = math.degrees(math.atan2(face_up[0], -face_up[1])) return (yaw, pitch, roll) def score(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> float: angles = self.estimate(landmarks, frame_w, frame_h) if angles is None: return 0.0 yaw, pitch, roll = angles deviation = math.sqrt(yaw**2 + pitch**2 + (self.roll_weight * roll) ** 2) t = min(deviation / self.max_angle, 1.0) return 0.5 * (1.0 + math.cos(math.pi * t)) def draw_axes( self, frame: np.ndarray, landmarks: np.ndarray, axis_length: float = 50.0, ) -> np.ndarray: h, w = frame.shape[:2] success, rvec, tvec, image_points = self._solve(landmarks, w, h) if not success: return frame camera_matrix = self._get_camera_matrix(w, h) nose = tuple(image_points[0].astype(int)) axes_3d = np.float64( [[axis_length, 0, 0], [0, axis_length, 0], [0, 0, axis_length]] ) projected, _ = cv2.projectPoints( axes_3d, rvec, tvec, camera_matrix, self._dist_coeffs ) colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0)] for i, color in enumerate(colors): pt = tuple(projected[i].ravel().astype(int)) cv2.line(frame, nose, pt, color, 2) return frame