Spaces:
Sleeping
Sleeping
File size: 3,979 Bytes
c86c45b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 | import math
import cv2
import numpy as np
_LANDMARK_INDICES = [1, 152, 33, 263, 61, 291]
_MODEL_POINTS = np.array(
[
[0.0, 0.0, 0.0],
[0.0, -330.0, -65.0],
[-225.0, 170.0, -135.0],
[225.0, 170.0, -135.0],
[-150.0, -150.0, -125.0],
[150.0, -150.0, -125.0],
],
dtype=np.float64,
)
class HeadPoseEstimator:
def __init__(self, max_angle: float = 30.0, roll_weight: float = 0.5):
self.max_angle = max_angle
self.roll_weight = roll_weight
self._camera_matrix = None
self._frame_size = None
self._dist_coeffs = np.zeros((4, 1), dtype=np.float64)
self._cache_key = None
self._cache_result = None
def _get_camera_matrix(self, frame_w: int, frame_h: int) -> np.ndarray:
if self._camera_matrix is not None and self._frame_size == (frame_w, frame_h):
return self._camera_matrix
focal_length = float(frame_w)
cx, cy = frame_w / 2.0, frame_h / 2.0
self._camera_matrix = np.array(
[[focal_length, 0, cx], [0, focal_length, cy], [0, 0, 1]],
dtype=np.float64,
)
self._frame_size = (frame_w, frame_h)
return self._camera_matrix
def _solve(self, landmarks: np.ndarray, frame_w: int, frame_h: int):
key = (landmarks.data.tobytes(), frame_w, frame_h)
if self._cache_key == key:
return self._cache_result
image_points = np.array(
[
[landmarks[i, 0] * frame_w, landmarks[i, 1] * frame_h]
for i in _LANDMARK_INDICES
],
dtype=np.float64,
)
camera_matrix = self._get_camera_matrix(frame_w, frame_h)
success, rvec, tvec = cv2.solvePnP(
_MODEL_POINTS,
image_points,
camera_matrix,
self._dist_coeffs,
flags=cv2.SOLVEPNP_ITERATIVE,
)
result = (success, rvec, tvec, image_points)
self._cache_key = key
self._cache_result = result
return result
def estimate(
self, landmarks: np.ndarray, frame_w: int, frame_h: int
) -> tuple[float, float, float] | None:
success, rvec, tvec, _ = self._solve(landmarks, frame_w, frame_h)
if not success:
return None
rmat, _ = cv2.Rodrigues(rvec)
nose_dir = rmat @ np.array([0.0, 0.0, 1.0])
face_up = rmat @ np.array([0.0, 1.0, 0.0])
yaw = math.degrees(math.atan2(nose_dir[0], -nose_dir[2]))
pitch = math.degrees(math.asin(np.clip(-nose_dir[1], -1.0, 1.0)))
roll = math.degrees(math.atan2(face_up[0], -face_up[1]))
return (yaw, pitch, roll)
def score(self, landmarks: np.ndarray, frame_w: int, frame_h: int) -> float:
angles = self.estimate(landmarks, frame_w, frame_h)
if angles is None:
return 0.0
yaw, pitch, roll = angles
deviation = math.sqrt(yaw**2 + pitch**2 + (self.roll_weight * roll) ** 2)
t = min(deviation / self.max_angle, 1.0)
return 0.5 * (1.0 + math.cos(math.pi * t))
def draw_axes(
self,
frame: np.ndarray,
landmarks: np.ndarray,
axis_length: float = 50.0,
) -> np.ndarray:
h, w = frame.shape[:2]
success, rvec, tvec, image_points = self._solve(landmarks, w, h)
if not success:
return frame
camera_matrix = self._get_camera_matrix(w, h)
nose = tuple(image_points[0].astype(int))
axes_3d = np.float64(
[[axis_length, 0, 0], [0, axis_length, 0], [0, 0, axis_length]]
)
projected, _ = cv2.projectPoints(
axes_3d, rvec, tvec, camera_matrix, self._dist_coeffs
)
colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0)]
for i, color in enumerate(colors):
pt = tuple(projected[i].ravel().astype(int))
cv2.line(frame, nose, pt, color, 2)
return frame
|