| | import mediapipe as mp |
| | from mediapipe import solutions |
| | from mediapipe.framework.formats import landmark_pb2 |
| | import numpy as np |
| | import cv2 |
| |
|
| |
|
| | def convert_bbox_to_square_bbox(bbox, max_h, max_w, scale=1.0): |
| | |
| | width = bbox[1][0] - bbox[0][0] |
| | height = bbox[1][1] - bbox[0][1] |
| | max_size = max(width, height) * scale |
| |
|
| | |
| | center_x = (bbox[0][0] + bbox[1][0]) / 2 |
| | center_y = (bbox[0][1] + bbox[1][1]) / 2 |
| |
|
| | |
| | half_size = max_size / 2 |
| | left_top = [int(center_x - half_size), int(center_y - half_size)] |
| | right_bottom = [int(center_x + half_size), int(center_y + half_size)] |
| |
|
| | |
| | left_top[0] = max(0, left_top[0]) |
| | left_top[1] = max(0, left_top[1]) |
| | right_bottom[0] = min(max_w, right_bottom[0]) |
| | right_bottom[1] = min(max_h, right_bottom[1]) |
| |
|
| | |
| | return [left_top[0], left_top[1], right_bottom[0], right_bottom[1]] |
| |
|
| |
|
| | def draw_landmarks_on_image(rgb_image, detection_result): |
| | face_landmarks_list = detection_result.face_landmarks |
| | annotated_image = np.copy(rgb_image) |
| |
|
| | |
| | for idx in range(len(face_landmarks_list)): |
| | face_landmarks = face_landmarks_list[idx] |
| |
|
| | |
| | face_landmarks_proto = landmark_pb2.NormalizedLandmarkList() |
| | face_landmarks_proto.landmark.extend( |
| | [ |
| | landmark_pb2.NormalizedLandmark( |
| | x=landmark.x, y=landmark.y, z=landmark.z |
| | ) |
| | for landmark in face_landmarks |
| | ] |
| | ) |
| |
|
| | solutions.drawing_utils.draw_landmarks( |
| | image=annotated_image, |
| | landmark_list=face_landmarks_proto, |
| | connections=mp.solutions.face_mesh.FACEMESH_TESSELATION, |
| | landmark_drawing_spec=None, |
| | connection_drawing_spec=mp.solutions.drawing_styles.get_default_face_mesh_tesselation_style(), |
| | ) |
| | solutions.drawing_utils.draw_landmarks( |
| | image=annotated_image, |
| | landmark_list=face_landmarks_proto, |
| | connections=mp.solutions.face_mesh.FACEMESH_CONTOURS, |
| | landmark_drawing_spec=None, |
| | connection_drawing_spec=mp.solutions.drawing_styles.get_default_face_mesh_contours_style(), |
| | ) |
| | solutions.drawing_utils.draw_landmarks( |
| | image=annotated_image, |
| | landmark_list=face_landmarks_proto, |
| | connections=mp.solutions.face_mesh.FACEMESH_IRISES, |
| | landmark_drawing_spec=None, |
| | connection_drawing_spec=mp.solutions.drawing_styles.get_default_face_mesh_iris_connections_style(), |
| | ) |
| |
|
| | return annotated_image |
| |
|
| |
|
| | class FaceDetector: |
| | def __init__(self, mediapipe_model_asset_path, delegate=1, face_detection_confidence=0.5, num_faces=5): |
| | |
| | options = mp.tasks.vision.FaceLandmarkerOptions( |
| | base_options=mp.tasks.BaseOptions( |
| | model_asset_path=mediapipe_model_asset_path, |
| | |
| | |
| | delegate=delegate, |
| | ), |
| | running_mode=mp.tasks.vision.RunningMode.IMAGE, |
| | num_faces=num_faces, |
| | output_face_blendshapes=True, |
| | output_facial_transformation_matrixes=True, |
| | min_face_detection_confidence=face_detection_confidence, |
| | min_face_presence_confidence=face_detection_confidence, |
| | min_tracking_confidence=face_detection_confidence, |
| | ) |
| | self.detector = mp.tasks.vision.FaceLandmarker.create_from_options(options) |
| |
|
| | def get_one_face_xy_rotation_and_keypoints(self, image, mouth_bbox_scale = 1.2, eye_bbox_scale = 1.5, annotate_image: bool = False, save_vis=False): |
| | mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image) |
| | |
| | |
| | results = self.detector.detect(mp_image) |
| | max_h, max_w = image.shape[:2] |
| | |
| | if annotate_image: |
| | annotated_image = draw_landmarks_on_image(image, results) |
| | else: |
| | annotated_image = None |
| |
|
| | all_x = [] |
| | all_y = [] |
| | all_orientation = [] |
| | all_keypoints = [] |
| | all_bounding_box = [] |
| | all_mouth_bounding_box = [] |
| | all_eye_bounding_box = [] |
| | all_face_contour = [] |
| | all_eyeball = [] |
| | all_eyeball_mask = [] |
| | all_blendshapes = [] |
| | all_mouth_p = [] |
| | all_nose_p = [] |
| | all_left_eye_p = [] |
| | all_right_eye_p = [] |
| | num_faces = len(results.face_landmarks) |
| |
|
| | for face_blendshapes in results.face_blendshapes: |
| | blendshapes = [item.score for item in face_blendshapes] |
| | all_blendshapes.append(blendshapes) |
| |
|
| | all_facial_transformation_matrices = results.facial_transformation_matrixes |
| | |
| | for face_landmarks in results.face_landmarks: |
| | keypoints = [] |
| | bounding_box = [] |
| |
|
| | h, w = image.shape[0], image.shape[1] |
| | cx_min, cy_min = w, h |
| | cx_max, cy_max = 0, 0 |
| | for idx, lm in enumerate(face_landmarks): |
| | |
| | cx, cy = int(np.clip(lm.x, 0, 1) * w), int(np.clip(lm.y, 0, 1) * h) |
| |
|
| | if cx < cx_min: |
| | cx_min = cx |
| | if cy < cy_min: |
| | cy_min = cy |
| | if cx > cx_max: |
| | cx_max = cx |
| | if cy > cy_max: |
| | cy_max = cy |
| |
|
| | keypoints.append((lm.x, lm.y, lm.z)) |
| |
|
| | if idx == 137: |
| | right_cheek = (lm.x, lm.y, lm.z) |
| | if idx == 366: |
| | left_cheek = (lm.x, lm.y, lm.z) |
| | if idx == 4: |
| | nose = (lm.x, lm.y, lm.z) |
| |
|
| | |
| | face_middle = ( |
| | (right_cheek[0] + left_cheek[0]) / 2.0, |
| | (right_cheek[1] + left_cheek[1]) / 2.0, |
| | ) |
| |
|
| | x = nose[0] - face_middle[0] |
| | y = nose[1] - face_middle[1] |
| |
|
| | if x > 0.15: |
| | orientation = "left" |
| | elif x < -0.15: |
| | orientation = "right" |
| | else: |
| | orientation = "forward" |
| |
|
| | bounding_box = [(cx_min, cy_min), (cx_max, cy_max)] |
| |
|
| | all_keypoints.append(keypoints) |
| | all_bounding_box.append(bounding_box) |
| | all_x.append(x) |
| | all_y.append(y) |
| | all_orientation.append(orientation) |
| |
|
| | |
| | mouth_landmarks = [ |
| | 61, |
| | 146, |
| | 146, |
| | 91, |
| | 91, |
| | 181, |
| | 181, |
| | 84, |
| | 84, |
| | 17, |
| | 17, |
| | 314, |
| | 314, |
| | 405, |
| | 405, |
| | 321, |
| | 321, |
| | 375, |
| | 375, |
| | 291, |
| | 61, |
| | 185, |
| | 185, |
| | 40, |
| | 40, |
| | 39, |
| | 39, |
| | 37, |
| | 37, |
| | 0, |
| | 0, |
| | 267, |
| | 267, |
| | 269, |
| | 269, |
| | 270, |
| | 270, |
| | 409, |
| | 409, |
| | 291, |
| | 78, |
| | 95, |
| | 95, |
| | 88, |
| | 88, |
| | 178, |
| | 178, |
| | 87, |
| | 87, |
| | 14, |
| | 14, |
| | 317, |
| | 317, |
| | 402, |
| | 402, |
| | 318, |
| | 318, |
| | 324, |
| | 324, |
| | 308, |
| | 78, |
| | 191, |
| | 191, |
| | 80, |
| | 80, |
| | 81, |
| | 81, |
| | 82, |
| | 82, |
| | 13, |
| | 13, |
| | 312, |
| | 312, |
| | 311, |
| | 311, |
| | 310, |
| | 310, |
| | 415, |
| | 415, |
| | 308, |
| | ] |
| | |
| | mouth_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in mouth_landmarks] |
| | mouth_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in mouth_landmarks] |
| | mouth_bbox = [(min(mouth_x), min(mouth_y)), (max(mouth_x), max(mouth_y))] |
| | mouth_p = np.array([(mouth_bbox[0][0] + mouth_bbox[1][0]) / 2, (mouth_bbox[1][0] + mouth_bbox[1][1]) / 2]) |
| | mouth_bbox = convert_bbox_to_square_bbox(mouth_bbox, max_h, max_w, scale=mouth_bbox_scale) |
| |
|
| | nose_landmarks = [48, 115, 220, 45, 4, 275, 440, 344, 278] |
| | nose_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in nose_landmarks] |
| | nose_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in nose_landmarks] |
| | nose_bbox = [(min(nose_x), min(nose_y)), (max(nose_x), max(nose_y))] |
| | nose_p = np.array([(nose_bbox[0][0] + nose_bbox[1][0]) / 2, (nose_bbox[1][0] + nose_bbox[1][1]) / 2]) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | all_mouth_bounding_box.append(mouth_bbox) |
| |
|
| | |
| | left_eye_landmarks = [362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382] |
| | right_eye_landmarks = [33, 246, 161, 160, 159, 158, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7] |
| | |
| | left_eye_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in left_eye_landmarks] |
| | left_eye_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in left_eye_landmarks] |
| | left_eye_bbox = [(min(left_eye_x), min(left_eye_y)), (max(left_eye_x), max(left_eye_y))] |
| | left_size = max(left_eye_y) - min(left_eye_y) |
| | left_eye_p = np.array([(left_eye_bbox[0][0] + left_eye_bbox[1][0]) / 2, (left_eye_bbox[1][0] + left_eye_bbox[1][1]) / 2]) |
| | left_eye_bbox = convert_bbox_to_square_bbox(left_eye_bbox, max_h, max_w, scale=eye_bbox_scale) |
| | |
| | right_eye_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in right_eye_landmarks] |
| | right_eye_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in right_eye_landmarks] |
| | right_eye_bbox = [(min(right_eye_x), min(right_eye_y)), (max(right_eye_x), max(right_eye_y))] |
| | right_size = max(right_eye_y) - min(right_eye_y) |
| | right_eye_p = np.array([(right_eye_bbox[0][0] + right_eye_bbox[1][0]) / 2, (right_eye_bbox[1][0] + right_eye_bbox[1][1]) / 2]) |
| | right_eye_bbox = convert_bbox_to_square_bbox(right_eye_bbox, max_h, max_w, scale=eye_bbox_scale) |
| |
|
| | eye_bbox = {"left_eye": left_eye_bbox, "right_eye": right_eye_bbox} |
| | |
| | all_eye_bounding_box.append(eye_bbox) |
| | |
| | face_contour = np.zeros_like(image) |
| | for landmark_id, landmark in enumerate(face_landmarks): |
| | cx, cy = int(landmark.x * w), int(landmark.y * h) |
| | if cy >= max_h or cx >= max_w: continue |
| | if cy < 0 or cx < 0: continue |
| | face_contour[cy, cx] = (255, 255, 255) |
| | |
| | eyeball = np.zeros_like(image) |
| | for landmark_id, landmark in enumerate(face_landmarks): |
| | cx, cy = int(landmark.x * w), int(landmark.y * h) |
| | if landmark_id not in [468, 473]: continue |
| | if cy >= max_h or cx >= max_w: continue |
| | if cy < 0 or cx < 0: continue |
| | radius = int(left_size // 3) if landmark_id == 468 else int(right_size // 3) |
| | cv2.circle(eyeball, (cx, cy), radius=radius, color=(255, 0, 0), thickness=-1) |
| | eyeball_mask = (eyeball.sum(axis=2) != 0)[:, :, None] |
| | |
| | all_eyeball.append(eyeball) |
| | all_eyeball_mask.append(eyeball_mask) |
| | all_face_contour.append(face_contour) |
| | all_mouth_p.append(mouth_p) |
| | all_nose_p.append(nose_p) |
| | all_left_eye_p.append(left_eye_p) |
| | all_right_eye_p.append(right_eye_p) |
| | |
| | if save_vis: |
| | x_min, y_min, x_max, y_max = mouth_bbox |
| | cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 0, 255), 2) |
| | |
| | for eye_key, bbox in eye_bbox.items(): |
| | x_min, y_min, x_max, y_max = bbox |
| | color = (0, 0, 255) |
| | cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color, 2) |
| | |
| | for landmark_id, landmark in enumerate(face_landmarks): |
| | cx, cy = int(landmark.x * w), int(landmark.y * h) |
| | circle_size = 2 |
| | if landmark_id in mouth_landmarks: |
| | cv2.circle(image, (cx, cy), circle_size, (0, 0, 255), -1) |
| | elif landmark_id in left_eye_landmarks+right_eye_landmarks: |
| | cv2.circle(image, (cx, cy), circle_size, (0, 255, 0), -1) |
| | else: |
| | cv2.circle(image, (cx, cy), circle_size, (255, 255, 255), -1) |
| | cv2.imwrite('image_detect.png', image[:,:,::-1]) |
| | |
| |
|
| | return ( |
| | all_x, |
| | all_y, |
| | all_orientation, |
| | num_faces, |
| | all_keypoints, |
| | all_bounding_box, |
| | all_mouth_bounding_box, |
| | all_eye_bounding_box, |
| | all_face_contour, |
| | all_blendshapes, |
| | all_facial_transformation_matrices, |
| | annotated_image, |
| | all_mouth_p, |
| | all_nose_p, |
| | all_left_eye_p, |
| | all_right_eye_p, |
| | all_eyeball, |
| | all_eyeball_mask, |
| | ) |
| |
|
| | def get_face_xy_rotation_and_keypoints(self, image, mouth_bbox_scale = 1.2, eye_bbox_scale = 1.5, annotate_image: bool = False, save_vis=False): |
| | mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image) |
| | |
| | |
| | results = self.detector.detect(mp_image) |
| | max_h, max_w = image.shape[:2] |
| | |
| | if annotate_image: |
| | annotated_image = draw_landmarks_on_image(image, results) |
| | else: |
| | annotated_image = None |
| |
|
| | all_x = [] |
| | all_y = [] |
| | all_orientation = [] |
| | all_keypoints = [] |
| | all_bounding_box = [] |
| | all_mouth_bounding_box = [] |
| | all_eye_bounding_box = [] |
| | all_face_contour = [] |
| | all_blendshapes = [] |
| | num_faces = len(results.face_landmarks) |
| |
|
| | for face_blendshapes in results.face_blendshapes: |
| | blendshapes = [item.score for item in face_blendshapes] |
| | all_blendshapes.append(blendshapes) |
| |
|
| | all_facial_transformation_matrices = results.facial_transformation_matrixes |
| |
|
| | for face_landmarks in results.face_landmarks: |
| | keypoints = [] |
| | bounding_box = [] |
| |
|
| | h, w = image.shape[0], image.shape[1] |
| | cx_min, cy_min = w, h |
| | cx_max, cy_max = 0, 0 |
| | for idx, lm in enumerate(face_landmarks): |
| | |
| | cx, cy = int(np.clip(lm.x, 0, 1) * w), int(np.clip(lm.y, 0, 1) * h) |
| |
|
| | if cx < cx_min: |
| | cx_min = cx |
| | if cy < cy_min: |
| | cy_min = cy |
| | if cx > cx_max: |
| | cx_max = cx |
| | if cy > cy_max: |
| | cy_max = cy |
| |
|
| | keypoints.append((lm.x, lm.y, lm.z)) |
| |
|
| | if idx == 137: |
| | right_cheek = (lm.x, lm.y, lm.z) |
| | if idx == 366: |
| | left_cheek = (lm.x, lm.y, lm.z) |
| | if idx == 4: |
| | nose = (lm.x, lm.y, lm.z) |
| |
|
| | |
| | face_middle = ( |
| | (right_cheek[0] + left_cheek[0]) / 2.0, |
| | (right_cheek[1] + left_cheek[1]) / 2.0, |
| | ) |
| |
|
| | x = nose[0] - face_middle[0] |
| | y = nose[1] - face_middle[1] |
| |
|
| | if x > 0.15: |
| | orientation = "left" |
| | elif x < -0.15: |
| | orientation = "right" |
| | else: |
| | orientation = "forward" |
| |
|
| | bounding_box = [(cx_min, cy_min), (cx_max, cy_max)] |
| |
|
| | all_keypoints.append(keypoints) |
| | all_bounding_box.append(bounding_box) |
| | all_x.append(x) |
| | all_y.append(y) |
| | all_orientation.append(orientation) |
| |
|
| | |
| | mouth_landmarks = [ |
| | 61, |
| | 146, |
| | 146, |
| | 91, |
| | 91, |
| | 181, |
| | 181, |
| | 84, |
| | 84, |
| | 17, |
| | 17, |
| | 314, |
| | 314, |
| | 405, |
| | 405, |
| | 321, |
| | 321, |
| | 375, |
| | 375, |
| | 291, |
| | 61, |
| | 185, |
| | 185, |
| | 40, |
| | 40, |
| | 39, |
| | 39, |
| | 37, |
| | 37, |
| | 0, |
| | 0, |
| | 267, |
| | 267, |
| | 269, |
| | 269, |
| | 270, |
| | 270, |
| | 409, |
| | 409, |
| | 291, |
| | 78, |
| | 95, |
| | 95, |
| | 88, |
| | 88, |
| | 178, |
| | 178, |
| | 87, |
| | 87, |
| | 14, |
| | 14, |
| | 317, |
| | 317, |
| | 402, |
| | 402, |
| | 318, |
| | 318, |
| | 324, |
| | 324, |
| | 308, |
| | 78, |
| | 191, |
| | 191, |
| | 80, |
| | 80, |
| | 81, |
| | 81, |
| | 82, |
| | 82, |
| | 13, |
| | 13, |
| | 312, |
| | 312, |
| | 311, |
| | 311, |
| | 310, |
| | 310, |
| | 415, |
| | 415, |
| | 308, |
| | ] |
| | |
| | mouth_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in mouth_landmarks] |
| | mouth_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in mouth_landmarks] |
| | mouth_bbox = [(min(mouth_x), min(mouth_y)), (max(mouth_x), max(mouth_y))] |
| | mouth_bbox = convert_bbox_to_square_bbox(mouth_bbox, max_h, max_w, scale=mouth_bbox_scale) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | all_mouth_bounding_box.append(mouth_bbox) |
| |
|
| | |
| | left_eye_landmarks = [362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382] |
| | right_eye_landmarks = [33, 246, 161, 160, 159, 158, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7] |
| | |
| | left_eye_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in left_eye_landmarks] |
| | left_eye_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in left_eye_landmarks] |
| | left_eye_bbox = [(min(left_eye_x), min(left_eye_y)), (max(left_eye_x), max(left_eye_y))] |
| | left_eye_bbox = convert_bbox_to_square_bbox(left_eye_bbox, max_h, max_w, scale=eye_bbox_scale) |
| | |
| | right_eye_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in right_eye_landmarks] |
| | right_eye_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in right_eye_landmarks] |
| | right_eye_bbox = [(min(right_eye_x), min(right_eye_y)), (max(right_eye_x), max(right_eye_y))] |
| | right_eye_bbox = convert_bbox_to_square_bbox(right_eye_bbox, max_h, max_w, scale=eye_bbox_scale) |
| |
|
| | eye_bbox = {"left_eye": left_eye_bbox, "right_eye": right_eye_bbox} |
| | |
| | all_eye_bounding_box.append(eye_bbox) |
| | |
| | face_contour = np.zeros_like(image) |
| | for landmark_id, landmark in enumerate(face_landmarks): |
| | cx, cy = int(landmark.x * w), int(landmark.y * h) |
| | if cy >= max_h or cx >= max_w: continue |
| | if cy < 0 or cx < 0: continue |
| | face_contour[cy, cx] = (255, 255, 255) |
| | all_face_contour.append(face_contour) |
| | |
| | if save_vis: |
| | import cv2 |
| | x_min, y_min, x_max, y_max = mouth_bbox |
| | cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 0, 255), 2) |
| | |
| | for eye_key, bbox in eye_bbox.items(): |
| | x_min, y_min, x_max, y_max = bbox |
| | color = (0, 0, 255) |
| | cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color, 2) |
| | |
| | for landmark_id, landmark in enumerate(face_landmarks): |
| | cx, cy = int(landmark.x * w), int(landmark.y * h) |
| | circle_size = 2 |
| | if landmark_id in mouth_landmarks: |
| | cv2.circle(image, (cx, cy), circle_size, (0, 0, 255), -1) |
| | elif landmark_id in left_eye_landmarks+right_eye_landmarks: |
| | cv2.circle(image, (cx, cy), circle_size, (0, 255, 0), -1) |
| | else: |
| | cv2.circle(image, (cx, cy), circle_size, (255, 255, 255), -1) |
| | cv2.imwrite('image_detect.png', image[:,:,::-1]) |
| | |
| |
|
| | return ( |
| | all_x, |
| | all_y, |
| | all_orientation, |
| | num_faces, |
| | all_keypoints, |
| | all_bounding_box, |
| | all_mouth_bounding_box, |
| | all_eye_bounding_box, |
| | all_face_contour, |
| | all_blendshapes, |
| | all_facial_transformation_matrices, |
| | annotated_image, |
| | ) |