File size: 16,587 Bytes

5dccaf9

"""
Synthetic Data Generator for Eye Gaze Training

Generates realistic synthetic training data that simulates:
1. Eye crops with various iris positions (gaze directions)
2. Dark / low-light conditions
3. Glasses overlays
4. Lazy eye / strabismus (asymmetric eye gaze)
5. Various skin tones, eye colors
6. Sensor noise (CMOS simulation)
7. Illumination perturbation (directional light gradients)

Based on augmentation strategies from:
- AGE framework (arxiv:2603.26945) - GlassesGAN, illumination perturbation, sensor noise
- UnityEyes approach - synthetic eye rendering with parametric control
"""

import numpy as np
import tensorflow as tf
from PIL import Image, ImageDraw, ImageFilter
import random
import math


class SyntheticGazeDataGenerator:
    """
    Generates synthetic eye + face images with known gaze labels.
    
    Each sample contains:
      - left_eye: 64x64 RGB crop
      - right_eye: 64x64 RGB crop
      - face: 64x64 RGB crop
      - gaze_x, gaze_y: normalized screen coordinates [0, 1]
    """
    
    def __init__(self, img_size=64, seed=42):
        self.img_size = img_size
        self.rng = np.random.RandomState(seed)
        
        # Skin tone palette (RGB) - diverse range
        self.skin_tones = [
            (255, 224, 189), (255, 205, 148), (234, 192, 134),
            (255, 173, 96),  (210, 153, 83),  (187, 131, 71),
            (156, 102, 52),  (128, 80, 37),   (100, 64, 30),
            (74, 46, 21),    (60, 38, 18),    (45, 30, 15),
        ]
        
        # Eye colors (iris RGB)
        self.eye_colors = [
            (50, 30, 10),    # Dark brown
            (100, 60, 20),   # Light brown
            (40, 80, 40),    # Green
            (30, 50, 100),   # Blue
            (50, 50, 50),    # Grey
            (80, 40, 10),    # Hazel
            (20, 20, 20),    # Very dark (common in Asian/African eyes)
        ]
        
        # Glasses frame colors
        self.glasses_colors = [
            (0, 0, 0),       # Black
            (60, 40, 20),    # Brown
            (100, 100, 100), # Silver/grey
            (0, 0, 60),      # Dark blue
            (80, 0, 0),      # Dark red
        ]
    
    def _draw_eye(self, gaze_x, gaze_y, skin_tone, eye_color, eye_openness=1.0, 
                  lazy_offset_x=0.0, lazy_offset_y=0.0):
        """Draw a synthetic eye with iris at position determined by gaze."""
        size = self.img_size
        img = Image.new('RGB', (size, size), skin_tone)
        draw = ImageDraw.Draw(img)
        
        cx, cy = size // 2, size // 2
        
        # Eye white (sclera) - elliptical shape
        eye_w = int(size * 0.75)
        eye_h = int(size * 0.35 * eye_openness)
        sclera_bbox = [cx - eye_w//2, cy - eye_h//2, cx + eye_w//2, cy + eye_h//2]
        draw.ellipse(sclera_bbox, fill=(240, 240, 240), outline=(180, 150, 130))
        
        # Iris position: map gaze (0-1) to iris displacement within eye
        # Gaze (0,0) = top-left of screen, (1,1) = bottom-right
        # When looking left on screen, iris moves left relative to eye
        max_disp_x = eye_w * 0.25
        max_disp_y = eye_h * 0.2
        
        iris_offset_x = (gaze_x - 0.5) * 2 * max_disp_x + lazy_offset_x * max_disp_x
        iris_offset_y = (gaze_y - 0.5) * 2 * max_disp_y + lazy_offset_y * max_disp_y
        
        iris_cx = cx + iris_offset_x
        iris_cy = cy + iris_offset_y
        iris_r = int(size * 0.14)
        
        # Draw iris
        draw.ellipse([iris_cx - iris_r, iris_cy - iris_r, 
                      iris_cx + iris_r, iris_cy + iris_r], fill=eye_color)
        
        # Draw pupil (darker center)
        pupil_r = iris_r // 2
        draw.ellipse([iris_cx - pupil_r, iris_cy - pupil_r,
                      iris_cx + pupil_r, iris_cy + pupil_r], fill=(5, 5, 5))
        
        # Specular highlight (light reflection)
        spec_r = max(2, iris_r // 4)
        spec_x = iris_cx - iris_r * 0.3
        spec_y = iris_cy - iris_r * 0.3
        draw.ellipse([spec_x - spec_r, spec_y - spec_r,
                      spec_x + spec_r, spec_y + spec_r], fill=(255, 255, 255))
        
        # Upper eyelid
        lid_pts_upper = []
        for i in range(20):
            t = i / 19.0
            x = sclera_bbox[0] + t * eye_w
            # Parabolic eyelid shape
            y = cy - eye_h//2 - int(eye_h * 0.2 * math.sin(t * math.pi))
            lid_pts_upper.append((x, y))
        lid_pts_upper.extend([(sclera_bbox[2], 0), (sclera_bbox[0], 0)])
        draw.polygon(lid_pts_upper, fill=skin_tone)
        
        # Lower eyelid
        lid_pts_lower = []
        for i in range(20):
            t = i / 19.0
            x = sclera_bbox[0] + t * eye_w
            y = cy + eye_h//2 + int(eye_h * 0.15 * math.sin(t * math.pi))
            lid_pts_lower.append((x, y))
        lid_pts_lower.extend([(sclera_bbox[2], size), (sclera_bbox[0], size)])
        draw.polygon(lid_pts_lower, fill=skin_tone)
        
        # Eyelashes (thin lines)
        for i in range(0, eye_w, 4):
            x = sclera_bbox[0] + i
            t = i / eye_w
            y_base = cy - eye_h//2 - int(eye_h * 0.2 * math.sin(t * math.pi))
            draw.line([(x, y_base), (x + self.rng.randint(-2, 3), y_base - self.rng.randint(2, 6))], 
                     fill=(20, 15, 10), width=1)
        
        # Add slight blur for realism
        img = img.filter(ImageFilter.GaussianBlur(radius=0.5))
        
        return np.array(img, dtype=np.float32)
    
    def _draw_face(self, skin_tone):
        """Draw a simplified face crop (head pose context)."""
        size = self.img_size
        img = Image.new('RGB', (size, size), skin_tone)
        draw = ImageDraw.Draw(img)
        
        cx, cy = size // 2, size // 2
        
        # Face oval
        face_w, face_h = int(size * 0.8), int(size * 0.9)
        draw.ellipse([cx - face_w//2, cy - face_h//2, cx + face_w//2, cy + face_h//2], 
                     fill=skin_tone)
        
        # Eyebrow regions (darker)
        darker = tuple(max(0, c - 40) for c in skin_tone)
        draw.arc([cx - face_w//3, cy - face_h//4, cx - face_w//10, cy - face_h//6],
                 180, 360, fill=darker, width=2)
        draw.arc([cx + face_w//10, cy - face_h//4, cx + face_w//3, cy - face_h//6],
                 180, 360, fill=darker, width=2)
        
        # Nose hint
        draw.line([(cx, cy - face_h//8), (cx, cy + face_h//8)], fill=darker, width=1)
        
        # Mouth
        draw.arc([cx - face_w//6, cy + face_h//6, cx + face_w//6, cy + face_h//4],
                 0, 180, fill=(180, 80, 80), width=2)
        
        img = img.filter(ImageFilter.GaussianBlur(radius=1))
        return np.array(img, dtype=np.float32)
    
    def _add_glasses(self, eye_img, glasses_color):
        """Overlay glasses frame on eye image."""
        img = Image.fromarray(eye_img.astype(np.uint8))
        draw = ImageDraw.Draw(img)
        size = self.img_size
        cx, cy = size // 2, size // 2
        
        # Frame outline (circular lens)
        r = int(size * 0.35)
        frame_width = self.rng.randint(2, 5)
        draw.ellipse([cx - r, cy - r, cx + r, cy + r], outline=glasses_color, width=frame_width)
        
        # Temple arm hint
        draw.line([(cx + r, cy), (size, cy - 2)], fill=glasses_color, width=frame_width)
        
        # Lens tint/reflection (subtle)
        if self.rng.random() > 0.5:
            overlay = Image.new('RGBA', (size, size), (0, 0, 0, 0))
            overlay_draw = ImageDraw.Draw(overlay)
            tint_alpha = self.rng.randint(10, 40)
            overlay_draw.ellipse([cx - r + 2, cy - r + 2, cx + r - 2, cy + r - 2], 
                                fill=(200, 200, 255, tint_alpha))
            img = Image.alpha_composite(img.convert('RGBA'), overlay).convert('RGB')
        
        return np.array(img, dtype=np.float32)
    
    def _apply_dark_conditions(self, img, darkness_level):
        """Simulate dark/low-light conditions with noise."""
        # Reduce brightness
        img = img * darkness_level
        
        # Add shot noise (Poisson-like) - more visible in dark
        noise_scale = (1.0 - darkness_level) * 15
        noise = self.rng.randn(*img.shape) * noise_scale
        img = img + noise
        
        # Color temperature shift (warm/cool tint from artificial lighting)
        if self.rng.random() > 0.5:
            # Warm (yellowish - indoor lights)
            img[:, :, 0] *= 1.1
            img[:, :, 2] *= 0.85
        else:
            # Cool (bluish - screen light)
            img[:, :, 0] *= 0.85
            img[:, :, 2] *= 1.1
        
        return np.clip(img, 0, 255)
    
    def _apply_illumination_perturbation(self, img):
        """Apply directional light gradient (from AGE framework)."""
        size = img.shape[0]
        
        # Random gradient direction
        angle = self.rng.random() * 2 * math.pi
        
        # Create gradient
        y_coords, x_coords = np.mgrid[0:size, 0:size].astype(np.float32) / size
        gradient = (x_coords * math.cos(angle) + y_coords * math.sin(angle))
        gradient = (gradient - gradient.min()) / (gradient.max() - gradient.min() + 1e-8)
        
        # Random intensity and color
        intensity = self.rng.uniform(0.1, 0.5)
        color = self.rng.uniform(0.5, 1.5, size=3)
        
        gradient_rgb = np.stack([gradient * color[i] for i in range(3)], axis=-1)
        
        img = img + gradient_rgb * 255 * intensity
        return np.clip(img, 0, 255)
    
    def _apply_sensor_noise(self, img):
        """Simulate CMOS sensor noise (from AGE framework)."""
        # Gaussian read noise
        read_noise = self.rng.randn(*img.shape) * self.rng.uniform(2, 8)
        # Shot noise (signal-dependent)
        shot_noise = self.rng.randn(*img.shape) * np.sqrt(np.maximum(img, 0) + 1) * self.rng.uniform(0.1, 0.4)
        # Fixed pattern noise
        fpn = self.rng.randn(1, img.shape[1], img.shape[2]) * self.rng.uniform(1, 3)
        
        img = img + read_noise + shot_noise + fpn
        return np.clip(img, 0, 255)
    
    def generate_sample(self, with_glasses_prob=0.25, dark_prob=0.3, 
                        lazy_eye_prob=0.15, noise_prob=0.5):
        """Generate a single training sample."""
        # Random gaze target on screen
        gaze_x = self.rng.uniform(0.05, 0.95)
        gaze_y = self.rng.uniform(0.05, 0.95)
        
        # Random appearance
        skin_tone = self.skin_tones[self.rng.randint(len(self.skin_tones))]
        eye_color = self.eye_colors[self.rng.randint(len(self.eye_colors))]
        eye_openness = self.rng.uniform(0.6, 1.0)
        
        # Lazy eye simulation: one eye deviates from the target
        lazy_offset_x_L, lazy_offset_y_L = 0.0, 0.0
        lazy_offset_x_R, lazy_offset_y_R = 0.0, 0.0
        
        if self.rng.random() < lazy_eye_prob:
            # Strabismus: one eye deviates
            affected_eye = self.rng.choice(['left', 'right'])
            deviation_x = self.rng.uniform(-0.4, 0.4)
            deviation_y = self.rng.uniform(-0.15, 0.15)
            if affected_eye == 'left':
                lazy_offset_x_L = deviation_x
                lazy_offset_y_L = deviation_y
            else:
                lazy_offset_x_R = deviation_x
                lazy_offset_y_R = deviation_y
        
        # Draw eyes
        left_eye = self._draw_eye(gaze_x, gaze_y, skin_tone, eye_color, eye_openness,
                                  lazy_offset_x_L, lazy_offset_y_L)
        right_eye = self._draw_eye(gaze_x, gaze_y, skin_tone, eye_color, eye_openness,
                                   lazy_offset_x_R, lazy_offset_y_R)
        face = self._draw_face(skin_tone)
        
        # Apply glasses
        if self.rng.random() < with_glasses_prob:
            glasses_color = self.glasses_colors[self.rng.randint(len(self.glasses_colors))]
            left_eye = self._add_glasses(left_eye, glasses_color)
            right_eye = self._add_glasses(right_eye, glasses_color)
        
        # Apply dark conditions
        if self.rng.random() < dark_prob:
            darkness = self.rng.uniform(0.15, 0.5)
            left_eye = self._apply_dark_conditions(left_eye, darkness)
            right_eye = self._apply_dark_conditions(right_eye, darkness)
            face = self._apply_dark_conditions(face, darkness)
        
        # Illumination perturbation
        if self.rng.random() > 0.5:
            left_eye = self._apply_illumination_perturbation(left_eye)
            right_eye = self._apply_illumination_perturbation(right_eye)
        
        # Sensor noise
        if self.rng.random() < noise_prob:
            left_eye = self._apply_sensor_noise(left_eye)
            right_eye = self._apply_sensor_noise(right_eye)
        
        # Normalize to [0, 1]
        left_eye = left_eye / 255.0
        right_eye = right_eye / 255.0
        face = face / 255.0
        
        return {
            'left_eye': left_eye.astype(np.float32),
            'right_eye': right_eye.astype(np.float32),
            'face': face.astype(np.float32),
            'gaze_x': np.float32(gaze_x),
            'gaze_y': np.float32(gaze_y),
        }
    
    def generate_dataset(self, num_samples, with_glasses_prob=0.25, dark_prob=0.3,
                         lazy_eye_prob=0.15):
        """Generate a full dataset."""
        left_eyes = []
        right_eyes = []
        faces = []
        gaze_xs = []
        gaze_ys = []
        
        for i in range(num_samples):
            sample = self.generate_sample(
                with_glasses_prob=with_glasses_prob,
                dark_prob=dark_prob,
                lazy_eye_prob=lazy_eye_prob
            )
            left_eyes.append(sample['left_eye'])
            right_eyes.append(sample['right_eye'])
            faces.append(sample['face'])
            gaze_xs.append(sample['gaze_x'])
            gaze_ys.append(sample['gaze_y'])
            
            if (i + 1) % 1000 == 0:
                print(f"Generated {i+1}/{num_samples} samples")
        
        return {
            'left_eye': np.array(left_eyes),
            'right_eye': np.array(right_eyes),
            'face': np.array(faces),
            'gaze': np.column_stack([gaze_xs, gaze_ys])
        }


def create_tf_dataset(data_dict, batch_size=64, shuffle=True):
    """Convert numpy arrays to tf.data.Dataset for training."""
    dataset = tf.data.Dataset.from_tensor_slices((
        {
            'left_eye': data_dict['left_eye'],
            'right_eye': data_dict['right_eye'],
            'face': data_dict['face'],
        },
        data_dict['gaze']
    ))
    
    if shuffle:
        dataset = dataset.shuffle(buffer_size=min(len(data_dict['gaze']), 10000))
    
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset


def create_single_eye_dataset(data_dict, batch_size=64, shuffle=True):
    """Create dataset for single-eye model (uses averaged eye features)."""
    # Concatenate left and right eye side by side, or just use one
    # For single-eye model, we combine both eye crops horizontally
    # and also train on each eye separately for more data
    
    left_eyes = data_dict['left_eye']
    right_eyes = data_dict['right_eye']
    gaze = data_dict['gaze']
    
    # Use both eyes as separate training samples (doubles data)
    all_eyes = np.concatenate([left_eyes, right_eyes], axis=0)
    all_gaze = np.concatenate([gaze, gaze], axis=0)
    
    dataset = tf.data.Dataset.from_tensor_slices((all_eyes, all_gaze))
    
    if shuffle:
        dataset = dataset.shuffle(buffer_size=min(len(all_gaze), 10000))
    
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset


if __name__ == '__main__':
    print("Testing synthetic data generator...")
    gen = SyntheticGazeDataGenerator(seed=42)
    
    # Generate a small batch
    sample = gen.generate_sample()
    print(f"Sample keys: {list(sample.keys())}")
    print(f"Left eye shape: {sample['left_eye'].shape}")
    print(f"Gaze: ({sample['gaze_x']:.3f}, {sample['gaze_y']:.3f})")
    
    # Generate small dataset
    data = gen.generate_dataset(100)
    print(f"\nDataset shapes:")
    for k, v in data.items():
        print(f"  {k}: {v.shape}")
    
    # Test tf.data pipeline
    ds = create_tf_dataset(data, batch_size=16)
    for inputs, labels in ds.take(1):
        print(f"\nBatch shapes:")
        for k, v in inputs.items():
            print(f"  {k}: {v.shape}")
        print(f"  labels: {labels.shape}")
    
    print("\nDone!")