focus_Guard_test / models /head_pose.py
Yingtao-Zheng's picture
MERGE EVERYTHING from feature/integration-cleaned :((((((((((((((((((((((((((((((((((((((((((((((( so tired
7053a3a
from __future__ import annotations
import math
import cv2
import numpy as np
_LANDMARK_INDICES = [1, 152, 33, 263, 61, 291]
_MODEL_POINTS = np.array(
[
[0.0, 0.0, 0.0],
[0.0, -330.0, -65.0],
[-225.0, 170.0, -135.0],
[225.0, 170.0, -135.0],
[-150.0, -150.0, -125.0],
[150.0, -150.0, -125.0],
],
dtype=np.float64,
)
class HeadPoseEstimator:
def __init__(self, max_angle: float = 30.0, roll_weight: float = 0.5):
self.max_angle = max_angle
self.roll_weight = roll_weight
self._camera_matrix = None
self._frame_size = None
self._dist_coeffs = np.zeros((4, 1), dtype=np.float64)
self._cache_key = None
self._cache_result = None
def _get_camera_matrix(self, frame_w: int, frame_h: int) -> np.ndarray:
if self._camera_matrix is not None and self._frame_size == (frame_w, frame_h):
return self._camera_matrix
focal_length = float(frame_w)
cx, cy = frame_w / 2.0, frame_h / 2.0
self._camera_matrix = np.array(
[[focal_length, 0, cx], [0, focal_length, cy], [0, 0, 1]],
dtype=np.float64,
)
self._frame_size = (frame_w, frame_h)
return self._camera_matrix
def _solve(self, landmarks: np.ndarray, frame_w: int, frame_h: int):
key = (landmarks.data.tobytes(), frame_w, frame_h)
if self._cache_key == key:
return self._cache_result
image_points = np.array(
[
[landmarks[i, 0] * frame_w, landmarks[i, 1] * frame_h]
for i in _LANDMARK_INDICES
],
dtype=np.float64,
)
camera_matrix = self._get_camera_matrix(frame_w, frame_h)
success, rvec, tvec = cv2.solvePnP(
_MODEL_POINTS,
image_points,
camera_matrix,
self._dist_coeffs,
flags=cv2.SOLVEPNP_ITERATIVE,
)
result = (success, rvec, tvec, image_points)
self._cache_key = key
self._cache_result = result
return result
def estimate(
self, landmarks: np.ndarray, frame_w: int, frame_h: int
) -> tuple[float, float, float] | None:
success, rvec, tvec, _ = self._solve(landmarks, frame_w, frame_h)
if not success:
return None
rmat, _ = cv2.Rodrigues(rvec)
nose_dir = rmat @ np.array([0.0, 0.0, 1.0])
face_up = rmat @ np.array([0.0, 1.0, 0.0])
yaw = math.degrees(math.atan2(nose_dir[0], -nose_dir[2]))
pitch = math.degrees(math.asin(np.clip(-nose_dir[1], -1.0, 1.0)))
roll = math.degrees(math.atan2(face_up[0], -face_up[1]))
return (yaw, pitch, roll)
def score(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> float:
angles = self.estimate(landmarks, frame_w, frame_h)
if angles is None:
return 0.0
yaw, pitch, roll = angles
deviation = math.sqrt(yaw**2 + pitch**2 + (self.roll_weight * roll) ** 2)
t = min(deviation / self.max_angle, 1.0)
return 0.5 * (1.0 + math.cos(math.pi * t))
def draw_axes(
self,
frame: np.ndarray,
landmarks: np.ndarray,
axis_length: float = 50.0,
) -> np.ndarray:
h, w = frame.shape[:2]
success, rvec, tvec, image_points = self._solve(landmarks, w, h)
if not success:
return frame
camera_matrix = self._get_camera_matrix(w, h)
nose = tuple(image_points[0].astype(int))
axes_3d = np.float64(
[[axis_length, 0, 0], [0, axis_length, 0], [0, 0, axis_length]]
)
projected, _ = cv2.projectPoints(
axes_3d, rvec, tvec, camera_matrix, self._dist_coeffs
)
colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0)]
for i, color in enumerate(colors):
pt = tuple(projected[i].ravel().astype(int))
cv2.line(frame, nose, pt, color, 2)
return frame