GazeInceptionLite / src /data_generator.py

Upload src/data_generator.py with huggingface_hub

5dccaf9 verified 4 days ago

16.6 kB

	"""
	Synthetic Data Generator for Eye Gaze Training

	Generates realistic synthetic training data that simulates:
	1. Eye crops with various iris positions (gaze directions)
	2. Dark / low-light conditions
	3. Glasses overlays
	4. Lazy eye / strabismus (asymmetric eye gaze)
	5. Various skin tones, eye colors
	6. Sensor noise (CMOS simulation)
	7. Illumination perturbation (directional light gradients)

	Based on augmentation strategies from:
	- AGE framework (arxiv:2603.26945) - GlassesGAN, illumination perturbation, sensor noise
	- UnityEyes approach - synthetic eye rendering with parametric control
	"""

	import numpy as np
	import tensorflow as tf
	from PIL import Image, ImageDraw, ImageFilter
	import random
	import math


	class SyntheticGazeDataGenerator:
	"""
	Generates synthetic eye + face images with known gaze labels.

	Each sample contains:
	- left_eye: 64x64 RGB crop
	- right_eye: 64x64 RGB crop
	- face: 64x64 RGB crop
	- gaze_x, gaze_y: normalized screen coordinates [0, 1]
	"""

	def __init__(self, img_size=64, seed=42):
	self.img_size = img_size
	self.rng = np.random.RandomState(seed)

	# Skin tone palette (RGB) - diverse range
	self.skin_tones = [
	(255, 224, 189), (255, 205, 148), (234, 192, 134),
	(255, 173, 96), (210, 153, 83), (187, 131, 71),
	(156, 102, 52), (128, 80, 37), (100, 64, 30),
	(74, 46, 21), (60, 38, 18), (45, 30, 15),
	]

	# Eye colors (iris RGB)
	self.eye_colors = [
	(50, 30, 10), # Dark brown
	(100, 60, 20), # Light brown
	(40, 80, 40), # Green
	(30, 50, 100), # Blue
	(50, 50, 50), # Grey
	(80, 40, 10), # Hazel
	(20, 20, 20), # Very dark (common in Asian/African eyes)
	]

	# Glasses frame colors
	self.glasses_colors = [
	(0, 0, 0), # Black
	(60, 40, 20), # Brown
	(100, 100, 100), # Silver/grey
	(0, 0, 60), # Dark blue
	(80, 0, 0), # Dark red
	]

	def _draw_eye(self, gaze_x, gaze_y, skin_tone, eye_color, eye_openness=1.0,
	lazy_offset_x=0.0, lazy_offset_y=0.0):
	"""Draw a synthetic eye with iris at position determined by gaze."""
	size = self.img_size
	img = Image.new('RGB', (size, size), skin_tone)
	draw = ImageDraw.Draw(img)

	cx, cy = size // 2, size // 2

	# Eye white (sclera) - elliptical shape
	eye_w = int(size * 0.75)
	eye_h = int(size * 0.35 * eye_openness)
	sclera_bbox = [cx - eye_w//2, cy - eye_h//2, cx + eye_w//2, cy + eye_h//2]
	draw.ellipse(sclera_bbox, fill=(240, 240, 240), outline=(180, 150, 130))

	# Iris position: map gaze (0-1) to iris displacement within eye
	# Gaze (0,0) = top-left of screen, (1,1) = bottom-right
	# When looking left on screen, iris moves left relative to eye
	max_disp_x = eye_w * 0.25
	max_disp_y = eye_h * 0.2

	iris_offset_x = (gaze_x - 0.5) * 2 * max_disp_x + lazy_offset_x * max_disp_x
	iris_offset_y = (gaze_y - 0.5) * 2 * max_disp_y + lazy_offset_y * max_disp_y

	iris_cx = cx + iris_offset_x
	iris_cy = cy + iris_offset_y
	iris_r = int(size * 0.14)

	# Draw iris
	draw.ellipse([iris_cx - iris_r, iris_cy - iris_r,
	iris_cx + iris_r, iris_cy + iris_r], fill=eye_color)

	# Draw pupil (darker center)
	pupil_r = iris_r // 2
	draw.ellipse([iris_cx - pupil_r, iris_cy - pupil_r,
	iris_cx + pupil_r, iris_cy + pupil_r], fill=(5, 5, 5))

	# Specular highlight (light reflection)
	spec_r = max(2, iris_r // 4)
	spec_x = iris_cx - iris_r * 0.3
	spec_y = iris_cy - iris_r * 0.3
	draw.ellipse([spec_x - spec_r, spec_y - spec_r,
	spec_x + spec_r, spec_y + spec_r], fill=(255, 255, 255))

	# Upper eyelid
	lid_pts_upper = []
	for i in range(20):
	t = i / 19.0
	x = sclera_bbox[0] + t * eye_w
	# Parabolic eyelid shape
	y = cy - eye_h//2 - int(eye_h * 0.2 * math.sin(t * math.pi))
	lid_pts_upper.append((x, y))
	lid_pts_upper.extend([(sclera_bbox[2], 0), (sclera_bbox[0], 0)])
	draw.polygon(lid_pts_upper, fill=skin_tone)

	# Lower eyelid
	lid_pts_lower = []
	for i in range(20):
	t = i / 19.0
	x = sclera_bbox[0] + t * eye_w
	y = cy + eye_h//2 + int(eye_h * 0.15 * math.sin(t * math.pi))
	lid_pts_lower.append((x, y))
	lid_pts_lower.extend([(sclera_bbox[2], size), (sclera_bbox[0], size)])
	draw.polygon(lid_pts_lower, fill=skin_tone)

	# Eyelashes (thin lines)
	for i in range(0, eye_w, 4):
	x = sclera_bbox[0] + i
	t = i / eye_w
	y_base = cy - eye_h//2 - int(eye_h * 0.2 * math.sin(t * math.pi))
	draw.line([(x, y_base), (x + self.rng.randint(-2, 3), y_base - self.rng.randint(2, 6))],
	fill=(20, 15, 10), width=1)

	# Add slight blur for realism
	img = img.filter(ImageFilter.GaussianBlur(radius=0.5))

	return np.array(img, dtype=np.float32)

	def _draw_face(self, skin_tone):
	"""Draw a simplified face crop (head pose context)."""
	size = self.img_size
	img = Image.new('RGB', (size, size), skin_tone)
	draw = ImageDraw.Draw(img)

	cx, cy = size // 2, size // 2

	# Face oval
	face_w, face_h = int(size * 0.8), int(size * 0.9)
	draw.ellipse([cx - face_w//2, cy - face_h//2, cx + face_w//2, cy + face_h//2],
	fill=skin_tone)

	# Eyebrow regions (darker)
	darker = tuple(max(0, c - 40) for c in skin_tone)
	draw.arc([cx - face_w//3, cy - face_h//4, cx - face_w//10, cy - face_h//6],
	180, 360, fill=darker, width=2)
	draw.arc([cx + face_w//10, cy - face_h//4, cx + face_w//3, cy - face_h//6],
	180, 360, fill=darker, width=2)

	# Nose hint
	draw.line([(cx, cy - face_h//8), (cx, cy + face_h//8)], fill=darker, width=1)

	# Mouth
	draw.arc([cx - face_w//6, cy + face_h//6, cx + face_w//6, cy + face_h//4],
	0, 180, fill=(180, 80, 80), width=2)

	img = img.filter(ImageFilter.GaussianBlur(radius=1))
	return np.array(img, dtype=np.float32)

	def _add_glasses(self, eye_img, glasses_color):
	"""Overlay glasses frame on eye image."""
	img = Image.fromarray(eye_img.astype(np.uint8))
	draw = ImageDraw.Draw(img)
	size = self.img_size
	cx, cy = size // 2, size // 2

	# Frame outline (circular lens)
	r = int(size * 0.35)
	frame_width = self.rng.randint(2, 5)
	draw.ellipse([cx - r, cy - r, cx + r, cy + r], outline=glasses_color, width=frame_width)

	# Temple arm hint
	draw.line([(cx + r, cy), (size, cy - 2)], fill=glasses_color, width=frame_width)

	# Lens tint/reflection (subtle)
	if self.rng.random() > 0.5:
	overlay = Image.new('RGBA', (size, size), (0, 0, 0, 0))
	overlay_draw = ImageDraw.Draw(overlay)
	tint_alpha = self.rng.randint(10, 40)
	overlay_draw.ellipse([cx - r + 2, cy - r + 2, cx + r - 2, cy + r - 2],
	fill=(200, 200, 255, tint_alpha))
	img = Image.alpha_composite(img.convert('RGBA'), overlay).convert('RGB')

	return np.array(img, dtype=np.float32)

	def _apply_dark_conditions(self, img, darkness_level):
	"""Simulate dark/low-light conditions with noise."""
	# Reduce brightness
	img = img * darkness_level

	# Add shot noise (Poisson-like) - more visible in dark
	noise_scale = (1.0 - darkness_level) * 15
	noise = self.rng.randn(img.shape) noise_scale
	img = img + noise

	# Color temperature shift (warm/cool tint from artificial lighting)
	if self.rng.random() > 0.5:
	# Warm (yellowish - indoor lights)
	img[:, :, 0] *= 1.1
	img[:, :, 2] *= 0.85
	else:
	# Cool (bluish - screen light)
	img[:, :, 0] *= 0.85
	img[:, :, 2] *= 1.1

	return np.clip(img, 0, 255)

	def _apply_illumination_perturbation(self, img):
	"""Apply directional light gradient (from AGE framework)."""
	size = img.shape[0]

	# Random gradient direction
	angle = self.rng.random() * 2 * math.pi

	# Create gradient
	y_coords, x_coords = np.mgrid[0:size, 0:size].astype(np.float32) / size
	gradient = (x_coords * math.cos(angle) + y_coords * math.sin(angle))
	gradient = (gradient - gradient.min()) / (gradient.max() - gradient.min() + 1e-8)

	# Random intensity and color
	intensity = self.rng.uniform(0.1, 0.5)
	color = self.rng.uniform(0.5, 1.5, size=3)

	gradient_rgb = np.stack([gradient * color[i] for i in range(3)], axis=-1)

	img = img + gradient_rgb * 255 * intensity
	return np.clip(img, 0, 255)

	def _apply_sensor_noise(self, img):
	"""Simulate CMOS sensor noise (from AGE framework)."""
	# Gaussian read noise
	read_noise = self.rng.randn(img.shape) self.rng.uniform(2, 8)
	# Shot noise (signal-dependent)
	shot_noise = self.rng.randn(img.shape) np.sqrt(np.maximum(img, 0) + 1) * self.rng.uniform(0.1, 0.4)
	# Fixed pattern noise
	fpn = self.rng.randn(1, img.shape[1], img.shape[2]) * self.rng.uniform(1, 3)

	img = img + read_noise + shot_noise + fpn
	return np.clip(img, 0, 255)

	def generate_sample(self, with_glasses_prob=0.25, dark_prob=0.3,
	lazy_eye_prob=0.15, noise_prob=0.5):
	"""Generate a single training sample."""
	# Random gaze target on screen
	gaze_x = self.rng.uniform(0.05, 0.95)
	gaze_y = self.rng.uniform(0.05, 0.95)

	# Random appearance
	skin_tone = self.skin_tones[self.rng.randint(len(self.skin_tones))]
	eye_color = self.eye_colors[self.rng.randint(len(self.eye_colors))]
	eye_openness = self.rng.uniform(0.6, 1.0)

	# Lazy eye simulation: one eye deviates from the target
	lazy_offset_x_L, lazy_offset_y_L = 0.0, 0.0
	lazy_offset_x_R, lazy_offset_y_R = 0.0, 0.0

	if self.rng.random() < lazy_eye_prob:
	# Strabismus: one eye deviates
	affected_eye = self.rng.choice(['left', 'right'])
	deviation_x = self.rng.uniform(-0.4, 0.4)
	deviation_y = self.rng.uniform(-0.15, 0.15)
	if affected_eye == 'left':
	lazy_offset_x_L = deviation_x
	lazy_offset_y_L = deviation_y
	else:
	lazy_offset_x_R = deviation_x
	lazy_offset_y_R = deviation_y

	# Draw eyes
	left_eye = self._draw_eye(gaze_x, gaze_y, skin_tone, eye_color, eye_openness,
	lazy_offset_x_L, lazy_offset_y_L)
	right_eye = self._draw_eye(gaze_x, gaze_y, skin_tone, eye_color, eye_openness,
	lazy_offset_x_R, lazy_offset_y_R)
	face = self._draw_face(skin_tone)

	# Apply glasses
	if self.rng.random() < with_glasses_prob:
	glasses_color = self.glasses_colors[self.rng.randint(len(self.glasses_colors))]
	left_eye = self._add_glasses(left_eye, glasses_color)
	right_eye = self._add_glasses(right_eye, glasses_color)

	# Apply dark conditions
	if self.rng.random() < dark_prob:
	darkness = self.rng.uniform(0.15, 0.5)
	left_eye = self._apply_dark_conditions(left_eye, darkness)
	right_eye = self._apply_dark_conditions(right_eye, darkness)
	face = self._apply_dark_conditions(face, darkness)

	# Illumination perturbation
	if self.rng.random() > 0.5:
	left_eye = self._apply_illumination_perturbation(left_eye)
	right_eye = self._apply_illumination_perturbation(right_eye)

	# Sensor noise
	if self.rng.random() < noise_prob:
	left_eye = self._apply_sensor_noise(left_eye)
	right_eye = self._apply_sensor_noise(right_eye)

	# Normalize to [0, 1]
	left_eye = left_eye / 255.0
	right_eye = right_eye / 255.0
	face = face / 255.0

	return {
	'left_eye': left_eye.astype(np.float32),
	'right_eye': right_eye.astype(np.float32),
	'face': face.astype(np.float32),
	'gaze_x': np.float32(gaze_x),
	'gaze_y': np.float32(gaze_y),
	}

	def generate_dataset(self, num_samples, with_glasses_prob=0.25, dark_prob=0.3,
	lazy_eye_prob=0.15):
	"""Generate a full dataset."""
	left_eyes = []
	right_eyes = []
	faces = []
	gaze_xs = []
	gaze_ys = []

	for i in range(num_samples):
	sample = self.generate_sample(
	with_glasses_prob=with_glasses_prob,
	dark_prob=dark_prob,
	lazy_eye_prob=lazy_eye_prob
	)
	left_eyes.append(sample['left_eye'])
	right_eyes.append(sample['right_eye'])
	faces.append(sample['face'])
	gaze_xs.append(sample['gaze_x'])
	gaze_ys.append(sample['gaze_y'])

	if (i + 1) % 1000 == 0:
	print(f"Generated {i+1}/{num_samples} samples")

	return {
	'left_eye': np.array(left_eyes),
	'right_eye': np.array(right_eyes),
	'face': np.array(faces),
	'gaze': np.column_stack([gaze_xs, gaze_ys])
	}


	def create_tf_dataset(data_dict, batch_size=64, shuffle=True):
	"""Convert numpy arrays to tf.data.Dataset for training."""
	dataset = tf.data.Dataset.from_tensor_slices((
	{
	'left_eye': data_dict['left_eye'],
	'right_eye': data_dict['right_eye'],
	'face': data_dict['face'],
	},
	data_dict['gaze']
	))

	if shuffle:
	dataset = dataset.shuffle(buffer_size=min(len(data_dict['gaze']), 10000))

	dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
	return dataset


	def create_single_eye_dataset(data_dict, batch_size=64, shuffle=True):
	"""Create dataset for single-eye model (uses averaged eye features)."""
	# Concatenate left and right eye side by side, or just use one
	# For single-eye model, we combine both eye crops horizontally
	# and also train on each eye separately for more data

	left_eyes = data_dict['left_eye']
	right_eyes = data_dict['right_eye']
	gaze = data_dict['gaze']

	# Use both eyes as separate training samples (doubles data)
	all_eyes = np.concatenate([left_eyes, right_eyes], axis=0)
	all_gaze = np.concatenate([gaze, gaze], axis=0)

	dataset = tf.data.Dataset.from_tensor_slices((all_eyes, all_gaze))

	if shuffle:
	dataset = dataset.shuffle(buffer_size=min(len(all_gaze), 10000))

	dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
	return dataset


	if __name__ == '__main__':
	print("Testing synthetic data generator...")
	gen = SyntheticGazeDataGenerator(seed=42)

	# Generate a small batch
	sample = gen.generate_sample()
	print(f"Sample keys: {list(sample.keys())}")
	print(f"Left eye shape: {sample['left_eye'].shape}")
	print(f"Gaze: ({sample['gaze_x']:.3f}, {sample['gaze_y']:.3f})")

	# Generate small dataset
	data = gen.generate_dataset(100)
	print(f"\nDataset shapes:")
	for k, v in data.items():
	print(f" {k}: {v.shape}")

	# Test tf.data pipeline
	ds = create_tf_dataset(data, batch_size=16)
	for inputs, labels in ds.take(1):
	print(f"\nBatch shapes:")
	for k, v in inputs.items():
	print(f" {k}: {v.shape}")
	print(f" labels: {labels.shape}")

	print("\nDone!")