GazeInceptionLite / src /data_generator.py
BcantCode's picture
Upload src/data_generator.py with huggingface_hub
5dccaf9 verified
"""
Synthetic Data Generator for Eye Gaze Training
Generates realistic synthetic training data that simulates:
1. Eye crops with various iris positions (gaze directions)
2. Dark / low-light conditions
3. Glasses overlays
4. Lazy eye / strabismus (asymmetric eye gaze)
5. Various skin tones, eye colors
6. Sensor noise (CMOS simulation)
7. Illumination perturbation (directional light gradients)
Based on augmentation strategies from:
- AGE framework (arxiv:2603.26945) - GlassesGAN, illumination perturbation, sensor noise
- UnityEyes approach - synthetic eye rendering with parametric control
"""
import numpy as np
import tensorflow as tf
from PIL import Image, ImageDraw, ImageFilter
import random
import math
class SyntheticGazeDataGenerator:
"""
Generates synthetic eye + face images with known gaze labels.
Each sample contains:
- left_eye: 64x64 RGB crop
- right_eye: 64x64 RGB crop
- face: 64x64 RGB crop
- gaze_x, gaze_y: normalized screen coordinates [0, 1]
"""
def __init__(self, img_size=64, seed=42):
self.img_size = img_size
self.rng = np.random.RandomState(seed)
# Skin tone palette (RGB) - diverse range
self.skin_tones = [
(255, 224, 189), (255, 205, 148), (234, 192, 134),
(255, 173, 96), (210, 153, 83), (187, 131, 71),
(156, 102, 52), (128, 80, 37), (100, 64, 30),
(74, 46, 21), (60, 38, 18), (45, 30, 15),
]
# Eye colors (iris RGB)
self.eye_colors = [
(50, 30, 10), # Dark brown
(100, 60, 20), # Light brown
(40, 80, 40), # Green
(30, 50, 100), # Blue
(50, 50, 50), # Grey
(80, 40, 10), # Hazel
(20, 20, 20), # Very dark (common in Asian/African eyes)
]
# Glasses frame colors
self.glasses_colors = [
(0, 0, 0), # Black
(60, 40, 20), # Brown
(100, 100, 100), # Silver/grey
(0, 0, 60), # Dark blue
(80, 0, 0), # Dark red
]
def _draw_eye(self, gaze_x, gaze_y, skin_tone, eye_color, eye_openness=1.0,
lazy_offset_x=0.0, lazy_offset_y=0.0):
"""Draw a synthetic eye with iris at position determined by gaze."""
size = self.img_size
img = Image.new('RGB', (size, size), skin_tone)
draw = ImageDraw.Draw(img)
cx, cy = size // 2, size // 2
# Eye white (sclera) - elliptical shape
eye_w = int(size * 0.75)
eye_h = int(size * 0.35 * eye_openness)
sclera_bbox = [cx - eye_w//2, cy - eye_h//2, cx + eye_w//2, cy + eye_h//2]
draw.ellipse(sclera_bbox, fill=(240, 240, 240), outline=(180, 150, 130))
# Iris position: map gaze (0-1) to iris displacement within eye
# Gaze (0,0) = top-left of screen, (1,1) = bottom-right
# When looking left on screen, iris moves left relative to eye
max_disp_x = eye_w * 0.25
max_disp_y = eye_h * 0.2
iris_offset_x = (gaze_x - 0.5) * 2 * max_disp_x + lazy_offset_x * max_disp_x
iris_offset_y = (gaze_y - 0.5) * 2 * max_disp_y + lazy_offset_y * max_disp_y
iris_cx = cx + iris_offset_x
iris_cy = cy + iris_offset_y
iris_r = int(size * 0.14)
# Draw iris
draw.ellipse([iris_cx - iris_r, iris_cy - iris_r,
iris_cx + iris_r, iris_cy + iris_r], fill=eye_color)
# Draw pupil (darker center)
pupil_r = iris_r // 2
draw.ellipse([iris_cx - pupil_r, iris_cy - pupil_r,
iris_cx + pupil_r, iris_cy + pupil_r], fill=(5, 5, 5))
# Specular highlight (light reflection)
spec_r = max(2, iris_r // 4)
spec_x = iris_cx - iris_r * 0.3
spec_y = iris_cy - iris_r * 0.3
draw.ellipse([spec_x - spec_r, spec_y - spec_r,
spec_x + spec_r, spec_y + spec_r], fill=(255, 255, 255))
# Upper eyelid
lid_pts_upper = []
for i in range(20):
t = i / 19.0
x = sclera_bbox[0] + t * eye_w
# Parabolic eyelid shape
y = cy - eye_h//2 - int(eye_h * 0.2 * math.sin(t * math.pi))
lid_pts_upper.append((x, y))
lid_pts_upper.extend([(sclera_bbox[2], 0), (sclera_bbox[0], 0)])
draw.polygon(lid_pts_upper, fill=skin_tone)
# Lower eyelid
lid_pts_lower = []
for i in range(20):
t = i / 19.0
x = sclera_bbox[0] + t * eye_w
y = cy + eye_h//2 + int(eye_h * 0.15 * math.sin(t * math.pi))
lid_pts_lower.append((x, y))
lid_pts_lower.extend([(sclera_bbox[2], size), (sclera_bbox[0], size)])
draw.polygon(lid_pts_lower, fill=skin_tone)
# Eyelashes (thin lines)
for i in range(0, eye_w, 4):
x = sclera_bbox[0] + i
t = i / eye_w
y_base = cy - eye_h//2 - int(eye_h * 0.2 * math.sin(t * math.pi))
draw.line([(x, y_base), (x + self.rng.randint(-2, 3), y_base - self.rng.randint(2, 6))],
fill=(20, 15, 10), width=1)
# Add slight blur for realism
img = img.filter(ImageFilter.GaussianBlur(radius=0.5))
return np.array(img, dtype=np.float32)
def _draw_face(self, skin_tone):
"""Draw a simplified face crop (head pose context)."""
size = self.img_size
img = Image.new('RGB', (size, size), skin_tone)
draw = ImageDraw.Draw(img)
cx, cy = size // 2, size // 2
# Face oval
face_w, face_h = int(size * 0.8), int(size * 0.9)
draw.ellipse([cx - face_w//2, cy - face_h//2, cx + face_w//2, cy + face_h//2],
fill=skin_tone)
# Eyebrow regions (darker)
darker = tuple(max(0, c - 40) for c in skin_tone)
draw.arc([cx - face_w//3, cy - face_h//4, cx - face_w//10, cy - face_h//6],
180, 360, fill=darker, width=2)
draw.arc([cx + face_w//10, cy - face_h//4, cx + face_w//3, cy - face_h//6],
180, 360, fill=darker, width=2)
# Nose hint
draw.line([(cx, cy - face_h//8), (cx, cy + face_h//8)], fill=darker, width=1)
# Mouth
draw.arc([cx - face_w//6, cy + face_h//6, cx + face_w//6, cy + face_h//4],
0, 180, fill=(180, 80, 80), width=2)
img = img.filter(ImageFilter.GaussianBlur(radius=1))
return np.array(img, dtype=np.float32)
def _add_glasses(self, eye_img, glasses_color):
"""Overlay glasses frame on eye image."""
img = Image.fromarray(eye_img.astype(np.uint8))
draw = ImageDraw.Draw(img)
size = self.img_size
cx, cy = size // 2, size // 2
# Frame outline (circular lens)
r = int(size * 0.35)
frame_width = self.rng.randint(2, 5)
draw.ellipse([cx - r, cy - r, cx + r, cy + r], outline=glasses_color, width=frame_width)
# Temple arm hint
draw.line([(cx + r, cy), (size, cy - 2)], fill=glasses_color, width=frame_width)
# Lens tint/reflection (subtle)
if self.rng.random() > 0.5:
overlay = Image.new('RGBA', (size, size), (0, 0, 0, 0))
overlay_draw = ImageDraw.Draw(overlay)
tint_alpha = self.rng.randint(10, 40)
overlay_draw.ellipse([cx - r + 2, cy - r + 2, cx + r - 2, cy + r - 2],
fill=(200, 200, 255, tint_alpha))
img = Image.alpha_composite(img.convert('RGBA'), overlay).convert('RGB')
return np.array(img, dtype=np.float32)
def _apply_dark_conditions(self, img, darkness_level):
"""Simulate dark/low-light conditions with noise."""
# Reduce brightness
img = img * darkness_level
# Add shot noise (Poisson-like) - more visible in dark
noise_scale = (1.0 - darkness_level) * 15
noise = self.rng.randn(*img.shape) * noise_scale
img = img + noise
# Color temperature shift (warm/cool tint from artificial lighting)
if self.rng.random() > 0.5:
# Warm (yellowish - indoor lights)
img[:, :, 0] *= 1.1
img[:, :, 2] *= 0.85
else:
# Cool (bluish - screen light)
img[:, :, 0] *= 0.85
img[:, :, 2] *= 1.1
return np.clip(img, 0, 255)
def _apply_illumination_perturbation(self, img):
"""Apply directional light gradient (from AGE framework)."""
size = img.shape[0]
# Random gradient direction
angle = self.rng.random() * 2 * math.pi
# Create gradient
y_coords, x_coords = np.mgrid[0:size, 0:size].astype(np.float32) / size
gradient = (x_coords * math.cos(angle) + y_coords * math.sin(angle))
gradient = (gradient - gradient.min()) / (gradient.max() - gradient.min() + 1e-8)
# Random intensity and color
intensity = self.rng.uniform(0.1, 0.5)
color = self.rng.uniform(0.5, 1.5, size=3)
gradient_rgb = np.stack([gradient * color[i] for i in range(3)], axis=-1)
img = img + gradient_rgb * 255 * intensity
return np.clip(img, 0, 255)
def _apply_sensor_noise(self, img):
"""Simulate CMOS sensor noise (from AGE framework)."""
# Gaussian read noise
read_noise = self.rng.randn(*img.shape) * self.rng.uniform(2, 8)
# Shot noise (signal-dependent)
shot_noise = self.rng.randn(*img.shape) * np.sqrt(np.maximum(img, 0) + 1) * self.rng.uniform(0.1, 0.4)
# Fixed pattern noise
fpn = self.rng.randn(1, img.shape[1], img.shape[2]) * self.rng.uniform(1, 3)
img = img + read_noise + shot_noise + fpn
return np.clip(img, 0, 255)
def generate_sample(self, with_glasses_prob=0.25, dark_prob=0.3,
lazy_eye_prob=0.15, noise_prob=0.5):
"""Generate a single training sample."""
# Random gaze target on screen
gaze_x = self.rng.uniform(0.05, 0.95)
gaze_y = self.rng.uniform(0.05, 0.95)
# Random appearance
skin_tone = self.skin_tones[self.rng.randint(len(self.skin_tones))]
eye_color = self.eye_colors[self.rng.randint(len(self.eye_colors))]
eye_openness = self.rng.uniform(0.6, 1.0)
# Lazy eye simulation: one eye deviates from the target
lazy_offset_x_L, lazy_offset_y_L = 0.0, 0.0
lazy_offset_x_R, lazy_offset_y_R = 0.0, 0.0
if self.rng.random() < lazy_eye_prob:
# Strabismus: one eye deviates
affected_eye = self.rng.choice(['left', 'right'])
deviation_x = self.rng.uniform(-0.4, 0.4)
deviation_y = self.rng.uniform(-0.15, 0.15)
if affected_eye == 'left':
lazy_offset_x_L = deviation_x
lazy_offset_y_L = deviation_y
else:
lazy_offset_x_R = deviation_x
lazy_offset_y_R = deviation_y
# Draw eyes
left_eye = self._draw_eye(gaze_x, gaze_y, skin_tone, eye_color, eye_openness,
lazy_offset_x_L, lazy_offset_y_L)
right_eye = self._draw_eye(gaze_x, gaze_y, skin_tone, eye_color, eye_openness,
lazy_offset_x_R, lazy_offset_y_R)
face = self._draw_face(skin_tone)
# Apply glasses
if self.rng.random() < with_glasses_prob:
glasses_color = self.glasses_colors[self.rng.randint(len(self.glasses_colors))]
left_eye = self._add_glasses(left_eye, glasses_color)
right_eye = self._add_glasses(right_eye, glasses_color)
# Apply dark conditions
if self.rng.random() < dark_prob:
darkness = self.rng.uniform(0.15, 0.5)
left_eye = self._apply_dark_conditions(left_eye, darkness)
right_eye = self._apply_dark_conditions(right_eye, darkness)
face = self._apply_dark_conditions(face, darkness)
# Illumination perturbation
if self.rng.random() > 0.5:
left_eye = self._apply_illumination_perturbation(left_eye)
right_eye = self._apply_illumination_perturbation(right_eye)
# Sensor noise
if self.rng.random() < noise_prob:
left_eye = self._apply_sensor_noise(left_eye)
right_eye = self._apply_sensor_noise(right_eye)
# Normalize to [0, 1]
left_eye = left_eye / 255.0
right_eye = right_eye / 255.0
face = face / 255.0
return {
'left_eye': left_eye.astype(np.float32),
'right_eye': right_eye.astype(np.float32),
'face': face.astype(np.float32),
'gaze_x': np.float32(gaze_x),
'gaze_y': np.float32(gaze_y),
}
def generate_dataset(self, num_samples, with_glasses_prob=0.25, dark_prob=0.3,
lazy_eye_prob=0.15):
"""Generate a full dataset."""
left_eyes = []
right_eyes = []
faces = []
gaze_xs = []
gaze_ys = []
for i in range(num_samples):
sample = self.generate_sample(
with_glasses_prob=with_glasses_prob,
dark_prob=dark_prob,
lazy_eye_prob=lazy_eye_prob
)
left_eyes.append(sample['left_eye'])
right_eyes.append(sample['right_eye'])
faces.append(sample['face'])
gaze_xs.append(sample['gaze_x'])
gaze_ys.append(sample['gaze_y'])
if (i + 1) % 1000 == 0:
print(f"Generated {i+1}/{num_samples} samples")
return {
'left_eye': np.array(left_eyes),
'right_eye': np.array(right_eyes),
'face': np.array(faces),
'gaze': np.column_stack([gaze_xs, gaze_ys])
}
def create_tf_dataset(data_dict, batch_size=64, shuffle=True):
"""Convert numpy arrays to tf.data.Dataset for training."""
dataset = tf.data.Dataset.from_tensor_slices((
{
'left_eye': data_dict['left_eye'],
'right_eye': data_dict['right_eye'],
'face': data_dict['face'],
},
data_dict['gaze']
))
if shuffle:
dataset = dataset.shuffle(buffer_size=min(len(data_dict['gaze']), 10000))
dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
return dataset
def create_single_eye_dataset(data_dict, batch_size=64, shuffle=True):
"""Create dataset for single-eye model (uses averaged eye features)."""
# Concatenate left and right eye side by side, or just use one
# For single-eye model, we combine both eye crops horizontally
# and also train on each eye separately for more data
left_eyes = data_dict['left_eye']
right_eyes = data_dict['right_eye']
gaze = data_dict['gaze']
# Use both eyes as separate training samples (doubles data)
all_eyes = np.concatenate([left_eyes, right_eyes], axis=0)
all_gaze = np.concatenate([gaze, gaze], axis=0)
dataset = tf.data.Dataset.from_tensor_slices((all_eyes, all_gaze))
if shuffle:
dataset = dataset.shuffle(buffer_size=min(len(all_gaze), 10000))
dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
return dataset
if __name__ == '__main__':
print("Testing synthetic data generator...")
gen = SyntheticGazeDataGenerator(seed=42)
# Generate a small batch
sample = gen.generate_sample()
print(f"Sample keys: {list(sample.keys())}")
print(f"Left eye shape: {sample['left_eye'].shape}")
print(f"Gaze: ({sample['gaze_x']:.3f}, {sample['gaze_y']:.3f})")
# Generate small dataset
data = gen.generate_dataset(100)
print(f"\nDataset shapes:")
for k, v in data.items():
print(f" {k}: {v.shape}")
# Test tf.data pipeline
ds = create_tf_dataset(data, batch_size=16)
for inputs, labels in ds.take(1):
print(f"\nBatch shapes:")
for k, v in inputs.items():
print(f" {k}: {v.shape}")
print(f" labels: {labels.shape}")
print("\nDone!")