| | |
| | |
| | |
| | |
| |
|
| | """ |
| | Face detection and cropping utilities for 3D face reconstruction. |
| | |
| | This module provides functions for face detection, cropping, and preprocessing |
| | to align faces with training data specifications. |
| | """ |
| |
|
| | from typing import Tuple, Optional, Dict, Any |
| | import numpy as np |
| | import torch |
| | from PIL import Image |
| | from facenet_pytorch import MTCNN |
| | from rembg import remove |
| |
|
| | |
| | TRAINING_SET_FACE_SIZE = 194.2749650813705 |
| | TRAINING_SET_FACE_CENTER = [251.83270369057132, 280.0133630862363] |
| |
|
| | |
| | FACE_SIZE = TRAINING_SET_FACE_SIZE |
| | FACE_CENTER = TRAINING_SET_FACE_CENTER |
| | DEFAULT_BACKGROUND_COLOR = (255, 255, 255) |
| | DEFAULT_IMG_SIZE = 512 |
| |
|
| | |
| | DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
| |
|
| | |
| | FACE_DETECTOR = MTCNN( |
| | image_size=512, |
| | margin=0, |
| | min_face_size=20, |
| | thresholds=[0.6, 0.7, 0.7], |
| | factor=0.709, |
| | post_process=True, |
| | device=DEVICE |
| | ) |
| |
|
| | def select_face(detected_bounding_boxes: Optional[np.ndarray], confidence_scores: Optional[np.ndarray]) -> Optional[np.ndarray]: |
| | """ |
| | Select the largest face from detected faces with confidence above threshold. |
| | |
| | Args: |
| | detected_bounding_boxes: Detected bounding boxes in xyxy format |
| | confidence_scores: Detection confidence probabilities |
| | |
| | Returns: |
| | Selected bounding box or None if no suitable face found |
| | """ |
| | if detected_bounding_boxes is None or confidence_scores is None: |
| | return None |
| | |
| | |
| | high_confidence_faces = [ |
| | detected_bounding_boxes[i] for i in range(len(detected_bounding_boxes)) |
| | if confidence_scores[i] > 0.8 |
| | ] |
| | |
| | if not high_confidence_faces: |
| | return None |
| |
|
| | |
| | return max(high_confidence_faces, key=lambda bbox: (bbox[3] - bbox[1]) * (bbox[2] - bbox[0])) |
| |
|
| | def crop_face( |
| | input_image_array: np.ndarray, |
| | face_detector: MTCNN = FACE_DETECTOR, |
| | target_face_size: float = FACE_SIZE, |
| | target_face_center: list = FACE_CENTER, |
| | output_image_size: int = 512, |
| | background_color: Tuple[int, int, int] = (255, 255, 255) |
| | ) -> Tuple[Image.Image, Dict[str, Any]]: |
| | """ |
| | Crop and align face in image to match training data specifications. |
| | |
| | Args: |
| | input_image_array: Input image as numpy array (H, W, C) |
| | face_detector: MTCNN face detector instance |
| | target_face_size: Target face size from training data |
| | target_face_center: Target face center from training data |
| | output_image_size: Output image size |
| | background_color: Background color for padding |
| | |
| | Returns: |
| | Tuple of (cropped_image, crop_parameters) |
| | |
| | Raises: |
| | ValueError: If no face is detected in the image |
| | """ |
| | image_height, image_width, _ = input_image_array.shape |
| | |
| | |
| | if input_image_array.shape[2] == 4: |
| | rgba_pil_image = Image.fromarray(input_image_array) |
| | background_image = Image.new("RGB", rgba_pil_image.size, background_color) |
| | rgb_composite_image = Image.alpha_composite(background_image.convert("RGBA"), rgba_pil_image).convert("RGB") |
| | processed_image_array = np.array(rgb_composite_image) |
| | else: |
| | processed_image_array = input_image_array[:, :, :3] |
| |
|
| | |
| | detected_bounding_boxes, confidence_scores = face_detector.detect(processed_image_array) |
| | selected_face_bbox = select_face(detected_bounding_boxes, confidence_scores) |
| | if selected_face_bbox is None: |
| | raise ValueError("No face detected in the image") |
| |
|
| | |
| | detected_face_size = 0.5 * (selected_face_bbox[2] - selected_face_bbox[0] + selected_face_bbox[3] - selected_face_bbox[1]) |
| | detected_face_center = ( |
| | 0.5 * (selected_face_bbox[0] + selected_face_bbox[2]), |
| | 0.5 * (selected_face_bbox[1] + selected_face_bbox[3]) |
| | ) |
| |
|
| | |
| | scale_ratio = target_face_size / detected_face_size |
| | scaled_width, scaled_height = int(image_width * scale_ratio), int(image_height * scale_ratio) |
| | scaled_pil_image = Image.fromarray(processed_image_array).resize((scaled_width, scaled_height)) |
| | scaled_face_center = ( |
| | int(detected_face_center[0] * scale_ratio), |
| | int(detected_face_center[1] * scale_ratio) |
| | ) |
| |
|
| | |
| | output_image = Image.new("RGB", (output_image_size, output_image_size), color=background_color) |
| |
|
| | |
| | horizontal_offset = target_face_center[0] - scaled_face_center[0] |
| | vertical_offset = target_face_center[1] - scaled_face_center[1] |
| |
|
| | |
| | crop_left_boundary = int(max(0, -horizontal_offset)) |
| | crop_top_boundary = int(max(0, -vertical_offset)) |
| | crop_right_boundary = int(min(scaled_width, output_image_size - horizontal_offset)) |
| | crop_bottom_boundary = int(min(scaled_height, output_image_size - vertical_offset)) |
| |
|
| | |
| | cropped_face_image = scaled_pil_image.crop((crop_left_boundary, crop_top_boundary, crop_right_boundary, crop_bottom_boundary)) |
| | paste_coordinates = (int(max(0, horizontal_offset)), int(max(0, vertical_offset))) |
| | output_image.paste(cropped_face_image, paste_coordinates) |
| |
|
| | crop_parameters = { |
| | 'resize_ratio': scale_ratio, |
| | 'x_offset_left': horizontal_offset, |
| | 'y_offset_top': vertical_offset, |
| | } |
| |
|
| | return output_image, crop_parameters |
| |
|
| | def prepare_foreground_with_rembg(input_image_array: np.ndarray) -> np.ndarray: |
| | """ |
| | Prepare foreground image using rembg for background removal. |
| | |
| | Args: |
| | input_image_array: Input image as numpy array (H, W, C) |
| | |
| | Returns: |
| | RGBA image as numpy array with background removed |
| | """ |
| | pil_image = Image.fromarray(input_image_array) |
| | background_removed_image = remove(pil_image) |
| | processed_image_array = np.array(background_removed_image) |
| | |
| | |
| | if processed_image_array.shape[2] == 4: |
| | return processed_image_array |
| | elif processed_image_array.shape[2] == 3: |
| | height, width = processed_image_array.shape[:2] |
| | alpha_channel = np.full((height, width), 255, dtype=np.uint8) |
| | rgba_image = np.zeros((height, width, 4), dtype=np.uint8) |
| | rgba_image[:, :, :3] = processed_image_array |
| | rgba_image[:, :, 3] = alpha_channel |
| | return rgba_image |
| | |
| | return processed_image_array |
| |
|
| | def preprocess_image( |
| | original_image_array: np.ndarray, |
| | target_image_size: int = DEFAULT_IMG_SIZE, |
| | background_color: Tuple[int, int, int] = DEFAULT_BACKGROUND_COLOR |
| | ) -> Image.Image: |
| | """ |
| | Preprocess image with background removal and face cropping. |
| | |
| | Args: |
| | original_image_array: Input image as numpy array |
| | target_image_size: Target image size |
| | background_color: Background color for compositing |
| | |
| | Returns: |
| | Processed PIL Image |
| | """ |
| | processed_image_array = prepare_foreground_with_rembg(original_image_array) |
| | |
| | |
| | if processed_image_array.shape[2] == 4: |
| | rgba_pil_image = Image.fromarray(processed_image_array) |
| | background_image = Image.new("RGB", rgba_pil_image.size, background_color) |
| | rgb_composite_image = Image.alpha_composite(background_image.convert("RGBA"), rgba_pil_image).convert("RGB") |
| | processed_image_array = np.array(rgb_composite_image) |
| | |
| | cropped_image, crop_parameters = crop_face( |
| | processed_image_array, |
| | FACE_DETECTOR, |
| | FACE_SIZE, |
| | FACE_CENTER, |
| | target_image_size, |
| | background_color |
| | ) |
| | return cropped_image |
| |
|
| | def preprocess_image_without_cropping( |
| | original_image_array: np.ndarray, |
| | target_image_size: int = DEFAULT_IMG_SIZE, |
| | background_color: Tuple[int, int, int] = DEFAULT_BACKGROUND_COLOR |
| | ) -> Image.Image: |
| | """ |
| | Preprocess image with background removal, without face cropping. |
| | |
| | Args: |
| | original_image_array: Input image as numpy array |
| | target_image_size: Target image size |
| | background_color: Background color for compositing |
| | |
| | Returns: |
| | Processed PIL Image |
| | """ |
| | processed_image_array = prepare_foreground_with_rembg(original_image_array) |
| | |
| | resized_image = Image.fromarray(processed_image_array).resize((target_image_size, target_image_size)) |
| | background_image = Image.new("RGBA", (target_image_size, target_image_size), background_color) |
| | composite_image = Image.alpha_composite(background_image, resized_image).convert("RGB") |
| | return composite_image |
| |
|