BcantCode commited on
Commit
5dccaf9
·
verified ·
1 Parent(s): 687b215

Upload src/data_generator.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. src/data_generator.py +425 -0
src/data_generator.py ADDED
@@ -0,0 +1,425 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Synthetic Data Generator for Eye Gaze Training
3
+
4
+ Generates realistic synthetic training data that simulates:
5
+ 1. Eye crops with various iris positions (gaze directions)
6
+ 2. Dark / low-light conditions
7
+ 3. Glasses overlays
8
+ 4. Lazy eye / strabismus (asymmetric eye gaze)
9
+ 5. Various skin tones, eye colors
10
+ 6. Sensor noise (CMOS simulation)
11
+ 7. Illumination perturbation (directional light gradients)
12
+
13
+ Based on augmentation strategies from:
14
+ - AGE framework (arxiv:2603.26945) - GlassesGAN, illumination perturbation, sensor noise
15
+ - UnityEyes approach - synthetic eye rendering with parametric control
16
+ """
17
+
18
+ import numpy as np
19
+ import tensorflow as tf
20
+ from PIL import Image, ImageDraw, ImageFilter
21
+ import random
22
+ import math
23
+
24
+
25
+ class SyntheticGazeDataGenerator:
26
+ """
27
+ Generates synthetic eye + face images with known gaze labels.
28
+
29
+ Each sample contains:
30
+ - left_eye: 64x64 RGB crop
31
+ - right_eye: 64x64 RGB crop
32
+ - face: 64x64 RGB crop
33
+ - gaze_x, gaze_y: normalized screen coordinates [0, 1]
34
+ """
35
+
36
+ def __init__(self, img_size=64, seed=42):
37
+ self.img_size = img_size
38
+ self.rng = np.random.RandomState(seed)
39
+
40
+ # Skin tone palette (RGB) - diverse range
41
+ self.skin_tones = [
42
+ (255, 224, 189), (255, 205, 148), (234, 192, 134),
43
+ (255, 173, 96), (210, 153, 83), (187, 131, 71),
44
+ (156, 102, 52), (128, 80, 37), (100, 64, 30),
45
+ (74, 46, 21), (60, 38, 18), (45, 30, 15),
46
+ ]
47
+
48
+ # Eye colors (iris RGB)
49
+ self.eye_colors = [
50
+ (50, 30, 10), # Dark brown
51
+ (100, 60, 20), # Light brown
52
+ (40, 80, 40), # Green
53
+ (30, 50, 100), # Blue
54
+ (50, 50, 50), # Grey
55
+ (80, 40, 10), # Hazel
56
+ (20, 20, 20), # Very dark (common in Asian/African eyes)
57
+ ]
58
+
59
+ # Glasses frame colors
60
+ self.glasses_colors = [
61
+ (0, 0, 0), # Black
62
+ (60, 40, 20), # Brown
63
+ (100, 100, 100), # Silver/grey
64
+ (0, 0, 60), # Dark blue
65
+ (80, 0, 0), # Dark red
66
+ ]
67
+
68
+ def _draw_eye(self, gaze_x, gaze_y, skin_tone, eye_color, eye_openness=1.0,
69
+ lazy_offset_x=0.0, lazy_offset_y=0.0):
70
+ """Draw a synthetic eye with iris at position determined by gaze."""
71
+ size = self.img_size
72
+ img = Image.new('RGB', (size, size), skin_tone)
73
+ draw = ImageDraw.Draw(img)
74
+
75
+ cx, cy = size // 2, size // 2
76
+
77
+ # Eye white (sclera) - elliptical shape
78
+ eye_w = int(size * 0.75)
79
+ eye_h = int(size * 0.35 * eye_openness)
80
+ sclera_bbox = [cx - eye_w//2, cy - eye_h//2, cx + eye_w//2, cy + eye_h//2]
81
+ draw.ellipse(sclera_bbox, fill=(240, 240, 240), outline=(180, 150, 130))
82
+
83
+ # Iris position: map gaze (0-1) to iris displacement within eye
84
+ # Gaze (0,0) = top-left of screen, (1,1) = bottom-right
85
+ # When looking left on screen, iris moves left relative to eye
86
+ max_disp_x = eye_w * 0.25
87
+ max_disp_y = eye_h * 0.2
88
+
89
+ iris_offset_x = (gaze_x - 0.5) * 2 * max_disp_x + lazy_offset_x * max_disp_x
90
+ iris_offset_y = (gaze_y - 0.5) * 2 * max_disp_y + lazy_offset_y * max_disp_y
91
+
92
+ iris_cx = cx + iris_offset_x
93
+ iris_cy = cy + iris_offset_y
94
+ iris_r = int(size * 0.14)
95
+
96
+ # Draw iris
97
+ draw.ellipse([iris_cx - iris_r, iris_cy - iris_r,
98
+ iris_cx + iris_r, iris_cy + iris_r], fill=eye_color)
99
+
100
+ # Draw pupil (darker center)
101
+ pupil_r = iris_r // 2
102
+ draw.ellipse([iris_cx - pupil_r, iris_cy - pupil_r,
103
+ iris_cx + pupil_r, iris_cy + pupil_r], fill=(5, 5, 5))
104
+
105
+ # Specular highlight (light reflection)
106
+ spec_r = max(2, iris_r // 4)
107
+ spec_x = iris_cx - iris_r * 0.3
108
+ spec_y = iris_cy - iris_r * 0.3
109
+ draw.ellipse([spec_x - spec_r, spec_y - spec_r,
110
+ spec_x + spec_r, spec_y + spec_r], fill=(255, 255, 255))
111
+
112
+ # Upper eyelid
113
+ lid_pts_upper = []
114
+ for i in range(20):
115
+ t = i / 19.0
116
+ x = sclera_bbox[0] + t * eye_w
117
+ # Parabolic eyelid shape
118
+ y = cy - eye_h//2 - int(eye_h * 0.2 * math.sin(t * math.pi))
119
+ lid_pts_upper.append((x, y))
120
+ lid_pts_upper.extend([(sclera_bbox[2], 0), (sclera_bbox[0], 0)])
121
+ draw.polygon(lid_pts_upper, fill=skin_tone)
122
+
123
+ # Lower eyelid
124
+ lid_pts_lower = []
125
+ for i in range(20):
126
+ t = i / 19.0
127
+ x = sclera_bbox[0] + t * eye_w
128
+ y = cy + eye_h//2 + int(eye_h * 0.15 * math.sin(t * math.pi))
129
+ lid_pts_lower.append((x, y))
130
+ lid_pts_lower.extend([(sclera_bbox[2], size), (sclera_bbox[0], size)])
131
+ draw.polygon(lid_pts_lower, fill=skin_tone)
132
+
133
+ # Eyelashes (thin lines)
134
+ for i in range(0, eye_w, 4):
135
+ x = sclera_bbox[0] + i
136
+ t = i / eye_w
137
+ y_base = cy - eye_h//2 - int(eye_h * 0.2 * math.sin(t * math.pi))
138
+ draw.line([(x, y_base), (x + self.rng.randint(-2, 3), y_base - self.rng.randint(2, 6))],
139
+ fill=(20, 15, 10), width=1)
140
+
141
+ # Add slight blur for realism
142
+ img = img.filter(ImageFilter.GaussianBlur(radius=0.5))
143
+
144
+ return np.array(img, dtype=np.float32)
145
+
146
+ def _draw_face(self, skin_tone):
147
+ """Draw a simplified face crop (head pose context)."""
148
+ size = self.img_size
149
+ img = Image.new('RGB', (size, size), skin_tone)
150
+ draw = ImageDraw.Draw(img)
151
+
152
+ cx, cy = size // 2, size // 2
153
+
154
+ # Face oval
155
+ face_w, face_h = int(size * 0.8), int(size * 0.9)
156
+ draw.ellipse([cx - face_w//2, cy - face_h//2, cx + face_w//2, cy + face_h//2],
157
+ fill=skin_tone)
158
+
159
+ # Eyebrow regions (darker)
160
+ darker = tuple(max(0, c - 40) for c in skin_tone)
161
+ draw.arc([cx - face_w//3, cy - face_h//4, cx - face_w//10, cy - face_h//6],
162
+ 180, 360, fill=darker, width=2)
163
+ draw.arc([cx + face_w//10, cy - face_h//4, cx + face_w//3, cy - face_h//6],
164
+ 180, 360, fill=darker, width=2)
165
+
166
+ # Nose hint
167
+ draw.line([(cx, cy - face_h//8), (cx, cy + face_h//8)], fill=darker, width=1)
168
+
169
+ # Mouth
170
+ draw.arc([cx - face_w//6, cy + face_h//6, cx + face_w//6, cy + face_h//4],
171
+ 0, 180, fill=(180, 80, 80), width=2)
172
+
173
+ img = img.filter(ImageFilter.GaussianBlur(radius=1))
174
+ return np.array(img, dtype=np.float32)
175
+
176
+ def _add_glasses(self, eye_img, glasses_color):
177
+ """Overlay glasses frame on eye image."""
178
+ img = Image.fromarray(eye_img.astype(np.uint8))
179
+ draw = ImageDraw.Draw(img)
180
+ size = self.img_size
181
+ cx, cy = size // 2, size // 2
182
+
183
+ # Frame outline (circular lens)
184
+ r = int(size * 0.35)
185
+ frame_width = self.rng.randint(2, 5)
186
+ draw.ellipse([cx - r, cy - r, cx + r, cy + r], outline=glasses_color, width=frame_width)
187
+
188
+ # Temple arm hint
189
+ draw.line([(cx + r, cy), (size, cy - 2)], fill=glasses_color, width=frame_width)
190
+
191
+ # Lens tint/reflection (subtle)
192
+ if self.rng.random() > 0.5:
193
+ overlay = Image.new('RGBA', (size, size), (0, 0, 0, 0))
194
+ overlay_draw = ImageDraw.Draw(overlay)
195
+ tint_alpha = self.rng.randint(10, 40)
196
+ overlay_draw.ellipse([cx - r + 2, cy - r + 2, cx + r - 2, cy + r - 2],
197
+ fill=(200, 200, 255, tint_alpha))
198
+ img = Image.alpha_composite(img.convert('RGBA'), overlay).convert('RGB')
199
+
200
+ return np.array(img, dtype=np.float32)
201
+
202
+ def _apply_dark_conditions(self, img, darkness_level):
203
+ """Simulate dark/low-light conditions with noise."""
204
+ # Reduce brightness
205
+ img = img * darkness_level
206
+
207
+ # Add shot noise (Poisson-like) - more visible in dark
208
+ noise_scale = (1.0 - darkness_level) * 15
209
+ noise = self.rng.randn(*img.shape) * noise_scale
210
+ img = img + noise
211
+
212
+ # Color temperature shift (warm/cool tint from artificial lighting)
213
+ if self.rng.random() > 0.5:
214
+ # Warm (yellowish - indoor lights)
215
+ img[:, :, 0] *= 1.1
216
+ img[:, :, 2] *= 0.85
217
+ else:
218
+ # Cool (bluish - screen light)
219
+ img[:, :, 0] *= 0.85
220
+ img[:, :, 2] *= 1.1
221
+
222
+ return np.clip(img, 0, 255)
223
+
224
+ def _apply_illumination_perturbation(self, img):
225
+ """Apply directional light gradient (from AGE framework)."""
226
+ size = img.shape[0]
227
+
228
+ # Random gradient direction
229
+ angle = self.rng.random() * 2 * math.pi
230
+
231
+ # Create gradient
232
+ y_coords, x_coords = np.mgrid[0:size, 0:size].astype(np.float32) / size
233
+ gradient = (x_coords * math.cos(angle) + y_coords * math.sin(angle))
234
+ gradient = (gradient - gradient.min()) / (gradient.max() - gradient.min() + 1e-8)
235
+
236
+ # Random intensity and color
237
+ intensity = self.rng.uniform(0.1, 0.5)
238
+ color = self.rng.uniform(0.5, 1.5, size=3)
239
+
240
+ gradient_rgb = np.stack([gradient * color[i] for i in range(3)], axis=-1)
241
+
242
+ img = img + gradient_rgb * 255 * intensity
243
+ return np.clip(img, 0, 255)
244
+
245
+ def _apply_sensor_noise(self, img):
246
+ """Simulate CMOS sensor noise (from AGE framework)."""
247
+ # Gaussian read noise
248
+ read_noise = self.rng.randn(*img.shape) * self.rng.uniform(2, 8)
249
+ # Shot noise (signal-dependent)
250
+ shot_noise = self.rng.randn(*img.shape) * np.sqrt(np.maximum(img, 0) + 1) * self.rng.uniform(0.1, 0.4)
251
+ # Fixed pattern noise
252
+ fpn = self.rng.randn(1, img.shape[1], img.shape[2]) * self.rng.uniform(1, 3)
253
+
254
+ img = img + read_noise + shot_noise + fpn
255
+ return np.clip(img, 0, 255)
256
+
257
+ def generate_sample(self, with_glasses_prob=0.25, dark_prob=0.3,
258
+ lazy_eye_prob=0.15, noise_prob=0.5):
259
+ """Generate a single training sample."""
260
+ # Random gaze target on screen
261
+ gaze_x = self.rng.uniform(0.05, 0.95)
262
+ gaze_y = self.rng.uniform(0.05, 0.95)
263
+
264
+ # Random appearance
265
+ skin_tone = self.skin_tones[self.rng.randint(len(self.skin_tones))]
266
+ eye_color = self.eye_colors[self.rng.randint(len(self.eye_colors))]
267
+ eye_openness = self.rng.uniform(0.6, 1.0)
268
+
269
+ # Lazy eye simulation: one eye deviates from the target
270
+ lazy_offset_x_L, lazy_offset_y_L = 0.0, 0.0
271
+ lazy_offset_x_R, lazy_offset_y_R = 0.0, 0.0
272
+
273
+ if self.rng.random() < lazy_eye_prob:
274
+ # Strabismus: one eye deviates
275
+ affected_eye = self.rng.choice(['left', 'right'])
276
+ deviation_x = self.rng.uniform(-0.4, 0.4)
277
+ deviation_y = self.rng.uniform(-0.15, 0.15)
278
+ if affected_eye == 'left':
279
+ lazy_offset_x_L = deviation_x
280
+ lazy_offset_y_L = deviation_y
281
+ else:
282
+ lazy_offset_x_R = deviation_x
283
+ lazy_offset_y_R = deviation_y
284
+
285
+ # Draw eyes
286
+ left_eye = self._draw_eye(gaze_x, gaze_y, skin_tone, eye_color, eye_openness,
287
+ lazy_offset_x_L, lazy_offset_y_L)
288
+ right_eye = self._draw_eye(gaze_x, gaze_y, skin_tone, eye_color, eye_openness,
289
+ lazy_offset_x_R, lazy_offset_y_R)
290
+ face = self._draw_face(skin_tone)
291
+
292
+ # Apply glasses
293
+ if self.rng.random() < with_glasses_prob:
294
+ glasses_color = self.glasses_colors[self.rng.randint(len(self.glasses_colors))]
295
+ left_eye = self._add_glasses(left_eye, glasses_color)
296
+ right_eye = self._add_glasses(right_eye, glasses_color)
297
+
298
+ # Apply dark conditions
299
+ if self.rng.random() < dark_prob:
300
+ darkness = self.rng.uniform(0.15, 0.5)
301
+ left_eye = self._apply_dark_conditions(left_eye, darkness)
302
+ right_eye = self._apply_dark_conditions(right_eye, darkness)
303
+ face = self._apply_dark_conditions(face, darkness)
304
+
305
+ # Illumination perturbation
306
+ if self.rng.random() > 0.5:
307
+ left_eye = self._apply_illumination_perturbation(left_eye)
308
+ right_eye = self._apply_illumination_perturbation(right_eye)
309
+
310
+ # Sensor noise
311
+ if self.rng.random() < noise_prob:
312
+ left_eye = self._apply_sensor_noise(left_eye)
313
+ right_eye = self._apply_sensor_noise(right_eye)
314
+
315
+ # Normalize to [0, 1]
316
+ left_eye = left_eye / 255.0
317
+ right_eye = right_eye / 255.0
318
+ face = face / 255.0
319
+
320
+ return {
321
+ 'left_eye': left_eye.astype(np.float32),
322
+ 'right_eye': right_eye.astype(np.float32),
323
+ 'face': face.astype(np.float32),
324
+ 'gaze_x': np.float32(gaze_x),
325
+ 'gaze_y': np.float32(gaze_y),
326
+ }
327
+
328
+ def generate_dataset(self, num_samples, with_glasses_prob=0.25, dark_prob=0.3,
329
+ lazy_eye_prob=0.15):
330
+ """Generate a full dataset."""
331
+ left_eyes = []
332
+ right_eyes = []
333
+ faces = []
334
+ gaze_xs = []
335
+ gaze_ys = []
336
+
337
+ for i in range(num_samples):
338
+ sample = self.generate_sample(
339
+ with_glasses_prob=with_glasses_prob,
340
+ dark_prob=dark_prob,
341
+ lazy_eye_prob=lazy_eye_prob
342
+ )
343
+ left_eyes.append(sample['left_eye'])
344
+ right_eyes.append(sample['right_eye'])
345
+ faces.append(sample['face'])
346
+ gaze_xs.append(sample['gaze_x'])
347
+ gaze_ys.append(sample['gaze_y'])
348
+
349
+ if (i + 1) % 1000 == 0:
350
+ print(f"Generated {i+1}/{num_samples} samples")
351
+
352
+ return {
353
+ 'left_eye': np.array(left_eyes),
354
+ 'right_eye': np.array(right_eyes),
355
+ 'face': np.array(faces),
356
+ 'gaze': np.column_stack([gaze_xs, gaze_ys])
357
+ }
358
+
359
+
360
+ def create_tf_dataset(data_dict, batch_size=64, shuffle=True):
361
+ """Convert numpy arrays to tf.data.Dataset for training."""
362
+ dataset = tf.data.Dataset.from_tensor_slices((
363
+ {
364
+ 'left_eye': data_dict['left_eye'],
365
+ 'right_eye': data_dict['right_eye'],
366
+ 'face': data_dict['face'],
367
+ },
368
+ data_dict['gaze']
369
+ ))
370
+
371
+ if shuffle:
372
+ dataset = dataset.shuffle(buffer_size=min(len(data_dict['gaze']), 10000))
373
+
374
+ dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
375
+ return dataset
376
+
377
+
378
+ def create_single_eye_dataset(data_dict, batch_size=64, shuffle=True):
379
+ """Create dataset for single-eye model (uses averaged eye features)."""
380
+ # Concatenate left and right eye side by side, or just use one
381
+ # For single-eye model, we combine both eye crops horizontally
382
+ # and also train on each eye separately for more data
383
+
384
+ left_eyes = data_dict['left_eye']
385
+ right_eyes = data_dict['right_eye']
386
+ gaze = data_dict['gaze']
387
+
388
+ # Use both eyes as separate training samples (doubles data)
389
+ all_eyes = np.concatenate([left_eyes, right_eyes], axis=0)
390
+ all_gaze = np.concatenate([gaze, gaze], axis=0)
391
+
392
+ dataset = tf.data.Dataset.from_tensor_slices((all_eyes, all_gaze))
393
+
394
+ if shuffle:
395
+ dataset = dataset.shuffle(buffer_size=min(len(all_gaze), 10000))
396
+
397
+ dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
398
+ return dataset
399
+
400
+
401
+ if __name__ == '__main__':
402
+ print("Testing synthetic data generator...")
403
+ gen = SyntheticGazeDataGenerator(seed=42)
404
+
405
+ # Generate a small batch
406
+ sample = gen.generate_sample()
407
+ print(f"Sample keys: {list(sample.keys())}")
408
+ print(f"Left eye shape: {sample['left_eye'].shape}")
409
+ print(f"Gaze: ({sample['gaze_x']:.3f}, {sample['gaze_y']:.3f})")
410
+
411
+ # Generate small dataset
412
+ data = gen.generate_dataset(100)
413
+ print(f"\nDataset shapes:")
414
+ for k, v in data.items():
415
+ print(f" {k}: {v.shape}")
416
+
417
+ # Test tf.data pipeline
418
+ ds = create_tf_dataset(data, batch_size=16)
419
+ for inputs, labels in ds.take(1):
420
+ print(f"\nBatch shapes:")
421
+ for k, v in inputs.items():
422
+ print(f" {k}: {v.shape}")
423
+ print(f" labels: {labels.shape}")
424
+
425
+ print("\nDone!")