Spaces:
Running
Running
MERGE EVERYTHING from feature/integration-cleaned :((((((((((((((((((((((((((((((((((((((((((((((( so tired
7053a3a | from __future__ import annotations | |
| import math | |
| import cv2 | |
| import numpy as np | |
| _LANDMARK_INDICES = [1, 152, 33, 263, 61, 291] | |
| _MODEL_POINTS = np.array( | |
| [ | |
| [0.0, 0.0, 0.0], | |
| [0.0, -330.0, -65.0], | |
| [-225.0, 170.0, -135.0], | |
| [225.0, 170.0, -135.0], | |
| [-150.0, -150.0, -125.0], | |
| [150.0, -150.0, -125.0], | |
| ], | |
| dtype=np.float64, | |
| ) | |
| class HeadPoseEstimator: | |
| def __init__(self, max_angle: float = 30.0, roll_weight: float = 0.5): | |
| self.max_angle = max_angle | |
| self.roll_weight = roll_weight | |
| self._camera_matrix = None | |
| self._frame_size = None | |
| self._dist_coeffs = np.zeros((4, 1), dtype=np.float64) | |
| self._cache_key = None | |
| self._cache_result = None | |
| def _get_camera_matrix(self, frame_w: int, frame_h: int) -> np.ndarray: | |
| if self._camera_matrix is not None and self._frame_size == (frame_w, frame_h): | |
| return self._camera_matrix | |
| focal_length = float(frame_w) | |
| cx, cy = frame_w / 2.0, frame_h / 2.0 | |
| self._camera_matrix = np.array( | |
| [[focal_length, 0, cx], [0, focal_length, cy], [0, 0, 1]], | |
| dtype=np.float64, | |
| ) | |
| self._frame_size = (frame_w, frame_h) | |
| return self._camera_matrix | |
| def _solve(self, landmarks: np.ndarray, frame_w: int, frame_h: int): | |
| key = (landmarks.data.tobytes(), frame_w, frame_h) | |
| if self._cache_key == key: | |
| return self._cache_result | |
| image_points = np.array( | |
| [ | |
| [landmarks[i, 0] * frame_w, landmarks[i, 1] * frame_h] | |
| for i in _LANDMARK_INDICES | |
| ], | |
| dtype=np.float64, | |
| ) | |
| camera_matrix = self._get_camera_matrix(frame_w, frame_h) | |
| success, rvec, tvec = cv2.solvePnP( | |
| _MODEL_POINTS, | |
| image_points, | |
| camera_matrix, | |
| self._dist_coeffs, | |
| flags=cv2.SOLVEPNP_ITERATIVE, | |
| ) | |
| result = (success, rvec, tvec, image_points) | |
| self._cache_key = key | |
| self._cache_result = result | |
| return result | |
| def estimate( | |
| self, landmarks: np.ndarray, frame_w: int, frame_h: int | |
| ) -> tuple[float, float, float] | None: | |
| success, rvec, tvec, _ = self._solve(landmarks, frame_w, frame_h) | |
| if not success: | |
| return None | |
| rmat, _ = cv2.Rodrigues(rvec) | |
| nose_dir = rmat @ np.array([0.0, 0.0, 1.0]) | |
| face_up = rmat @ np.array([0.0, 1.0, 0.0]) | |
| yaw = math.degrees(math.atan2(nose_dir[0], -nose_dir[2])) | |
| pitch = math.degrees(math.asin(np.clip(-nose_dir[1], -1.0, 1.0))) | |
| roll = math.degrees(math.atan2(face_up[0], -face_up[1])) | |
| return (yaw, pitch, roll) | |
| def score(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> float: | |
| angles = self.estimate(landmarks, frame_w, frame_h) | |
| if angles is None: | |
| return 0.0 | |
| yaw, pitch, roll = angles | |
| deviation = math.sqrt(yaw**2 + pitch**2 + (self.roll_weight * roll) ** 2) | |
| t = min(deviation / self.max_angle, 1.0) | |
| return 0.5 * (1.0 + math.cos(math.pi * t)) | |
| def draw_axes( | |
| self, | |
| frame: np.ndarray, | |
| landmarks: np.ndarray, | |
| axis_length: float = 50.0, | |
| ) -> np.ndarray: | |
| h, w = frame.shape[:2] | |
| success, rvec, tvec, image_points = self._solve(landmarks, w, h) | |
| if not success: | |
| return frame | |
| camera_matrix = self._get_camera_matrix(w, h) | |
| nose = tuple(image_points[0].astype(int)) | |
| axes_3d = np.float64( | |
| [[axis_length, 0, 0], [0, axis_length, 0], [0, 0, axis_length]] | |
| ) | |
| projected, _ = cv2.projectPoints( | |
| axes_3d, rvec, tvec, camera_matrix, self._dist_coeffs | |
| ) | |
| colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0)] | |
| for i, color in enumerate(colors): | |
| pt = tuple(projected[i].ravel().astype(int)) | |
| cv2.line(frame, nose, pt, color, 2) | |
| return frame | |