Spaces:
Sleeping
Sleeping
| """ | |
| Scroll Element Dataset Loader for Drizz Mobile App Testing | |
| Loads screenshots with bounding boxes and commands to identify scroll elements. | |
| Converts to GEPA-compatible format for prompt optimization. | |
| """ | |
| import base64 | |
| import random | |
| import logging | |
| from typing import List, Dict, Any, Tuple, Optional | |
| from pathlib import Path | |
| logger = logging.getLogger(__name__) | |
| class ScrollDatasetLoader: | |
| """ | |
| GENERIC dataset loader for image-based tasks. | |
| This is a LIBRARY class - NO hardcoded assumptions about: | |
| - What the task is (OCR, element detection, classification, etc.) | |
| - Input format (questions, commands, descriptions, etc.) | |
| - Output format (IDs, text, JSON, etc.) | |
| Users define their dataset in the test script and pass it here. | |
| Dataset format per item: (image_filename, input_text, expected_output) | |
| Example usage (ANY task): | |
| # Define YOUR dataset in YOUR test script | |
| my_dataset = [ | |
| ("img1.png", "What is the main color?", "blue"), | |
| ("img2.png", "Count the objects", "5"), | |
| ("img3.png", "Describe the scene", "A cat on a sofa"), | |
| ] | |
| # Pass to loader | |
| loader = ScrollDatasetLoader( | |
| images_dir="images", | |
| dataset_config=my_dataset | |
| ) | |
| data = loader.load_dataset() | |
| """ | |
| def __init__( | |
| self, | |
| images_dir: str = "images", | |
| dataset_config: Optional[List[Tuple[str, str, str]]] = None | |
| ): | |
| """ | |
| Initialize dataset loader. | |
| Args: | |
| images_dir: Directory containing images | |
| dataset_config: List of (image_filename, input_text, expected_output) tuples. | |
| REQUIRED - no hardcoded defaults to keep library generic. | |
| Raises: | |
| FileNotFoundError: If images_dir doesn't exist | |
| ValueError: If dataset_config is None | |
| """ | |
| self.images_dir = Path(images_dir) | |
| if not self.images_dir.exists(): | |
| raise FileNotFoundError(f"Images directory not found: {images_dir}") | |
| if dataset_config is None: | |
| raise ValueError( | |
| "dataset_config is required. This is a library class - define your " | |
| "dataset in the test script:\n" | |
| " dataset = [('img1.png', 'your input', 'expected output'), ...]\n" | |
| " loader = ScrollDatasetLoader(images_dir='...', dataset_config=dataset)" | |
| ) | |
| self.dataset_config = dataset_config | |
| def load_dataset(self) -> List[Dict[str, Any]]: | |
| """ | |
| Load complete dataset with images. | |
| Phase 1: Includes element_id extraction from expected output. | |
| Returns: | |
| List of dataset items in GEPA format: | |
| [ | |
| { | |
| "input": "Command: Scroll down by 70%", | |
| "output": "3", | |
| "image_base64": "<base64_encoded_image>", # TOP LEVEL | |
| "metadata": { | |
| "image_path": "images/5.png", | |
| "input_text": "Command: Scroll down by 70%", | |
| "expected_output": "3", | |
| "image_filename": "5.png", | |
| "element_id": 3 # Extracted integer (None if extraction fails) | |
| } | |
| }, | |
| ... | |
| ] | |
| """ | |
| dataset = [] | |
| # Generic variable names - no assumptions about data type | |
| for image_filename, input_text, expected_output in self.dataset_config: | |
| image_path = self.images_dir / image_filename | |
| # Validate image exists | |
| if not image_path.exists(): | |
| logger.warning(f"Image not found: {image_path}") | |
| continue | |
| # Read and encode image | |
| try: | |
| image_base64 = self._encode_image(image_path) | |
| except Exception as e: | |
| logger.warning(f"Error encoding {image_filename}: {e}") | |
| continue | |
| # 🔥 Phase 1: Extract element_id from expected_output for robust evaluation | |
| element_id = self._extract_element_id(expected_output) | |
| if element_id is None: | |
| logger.warning(f"Could not extract element_id from '{expected_output}' in {image_filename}") | |
| # Create dataset item - COMPLETELY GENERIC | |
| # NO assumptions about output format (element IDs, commands, etc.) | |
| # Just: image + input text + expected output text | |
| # Library doesn't know or care what the task is! | |
| # IMPORTANT: Put image_base64 at TOP LEVEL for UniversalConverter to find it | |
| dataset_item = { | |
| "input": input_text, # Generic input text (ANY format) | |
| "output": expected_output, # Generic expected output (ANY format, full reasoning) | |
| "image_base64": image_base64, # TOP LEVEL for converter | |
| "metadata": { | |
| "image_path": str(image_path), | |
| "input_text": input_text, | |
| "expected_output": expected_output, | |
| "image_filename": image_filename, | |
| "element_id": element_id # NEW: Extracted element ID (int or None) | |
| } | |
| } | |
| dataset.append(dataset_item) | |
| if not dataset: | |
| raise ValueError("No valid images found in dataset") | |
| logger.info(f"Loaded {len(dataset)} scroll element detection samples") | |
| return dataset | |
| def _extract_element_id(self, expected_output: str) -> Optional[int]: | |
| """ | |
| Extract element ID from expected output string. | |
| Handles multiple formats: | |
| - "Element: 4" | |
| - "Element 4" | |
| - "4" (standalone) | |
| - "Element: 4, Description: ..." (full reasoning) | |
| Args: | |
| expected_output: Full expected output string with reasoning | |
| Returns: | |
| Element ID as integer, or None if not found | |
| """ | |
| import re | |
| if not expected_output: | |
| return None | |
| # Pattern 1: "Element: X" or "Element X" (case insensitive) | |
| patterns = [ | |
| r'element[:\s]+(\d+)', # "Element: 4" or "Element 4" | |
| r'\belement\s+(\d+)\b', # "element 4" (word boundary) | |
| ] | |
| for pattern in patterns: | |
| match = re.search(pattern, expected_output, re.IGNORECASE) | |
| if match: | |
| try: | |
| element_id = int(match.group(1)) | |
| # Validate range (reasonable UI element IDs) | |
| if 1 <= element_id <= 100: | |
| return element_id | |
| except (ValueError, IndexError): | |
| continue | |
| # Pattern 2: First standalone number (if no "Element:" pattern found) | |
| # Only use if it's a reasonable element ID (1-100) | |
| number_match = re.search(r'\b(\d{1,3})\b', expected_output) | |
| if number_match: | |
| try: | |
| element_id = int(number_match.group(1)) | |
| if 1 <= element_id <= 100: # Reasonable range for UI elements | |
| return element_id | |
| except ValueError: | |
| pass | |
| return None | |
| def _encode_image(self, image_path: Path) -> str: | |
| """ | |
| Encode image to base64 string. | |
| Args: | |
| image_path: Path to image file | |
| Returns: | |
| Base64 encoded image string | |
| """ | |
| with open(image_path, "rb") as image_file: | |
| encoded = base64.b64encode(image_file.read()).decode('utf-8') | |
| return encoded | |
| def split_dataset( | |
| self, | |
| dataset: List[Dict[str, Any]], | |
| train_size: int = 4, | |
| val_size: int = 1, | |
| test_size: int = 1, | |
| shuffle: bool = True, | |
| seed: Optional[int] = None | |
| ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]: | |
| """ | |
| Split dataset into train, validation, and test sets. | |
| 🔥 NEW: Added shuffling support to ensure different image distribution | |
| across splits, preventing hard images from always landing in validation set. | |
| Args: | |
| dataset: Complete dataset | |
| train_size: Number of samples for training (default: 4) | |
| val_size: Number of samples for validation (default: 1) | |
| test_size: Number of samples for test (default: 1) | |
| shuffle: Whether to shuffle dataset before splitting (default: True) | |
| seed: Random seed for reproducible shuffling (default: None = random) | |
| Returns: | |
| Tuple of (train_set, val_set, test_set) | |
| """ | |
| n = len(dataset) | |
| # Validate split sizes | |
| total_size = train_size + val_size + test_size | |
| if total_size > n: | |
| logger.warning(f"Requested split ({total_size}) exceeds dataset size ({n}). Adjusting split proportionally...") | |
| ratio = n / total_size | |
| train_size = int(train_size * ratio) | |
| val_size = int(val_size * ratio) | |
| test_size = n - train_size - val_size | |
| # 🔥 CRITICAL: Shuffle dataset to ensure different image distribution | |
| # This prevents the same hard images from always being in validation set | |
| dataset_copy = dataset.copy() # Don't modify original | |
| if shuffle: | |
| if seed is not None: | |
| random.seed(seed) | |
| logger.debug(f"Shuffling dataset with seed={seed} for reproducible splits") | |
| else: | |
| logger.debug(f"Shuffling dataset randomly (no seed)") | |
| random.shuffle(dataset_copy) | |
| else: | |
| logger.warning(f"Not shuffling dataset - using original order") | |
| # Split shuffled dataset | |
| train_set = dataset_copy[:train_size] | |
| val_set = dataset_copy[train_size:train_size + val_size] | |
| test_set = dataset_copy[train_size + val_size:train_size + val_size + test_size] | |
| logger.info(f"Dataset split: {len(train_set)} train, {len(val_set)} val, {len(test_set)} test") | |
| # Log which images are in each split for debugging | |
| if shuffle: | |
| train_images = [item['metadata'].get('image_filename', 'N/A') for item in train_set] | |
| val_images = [item['metadata'].get('image_filename', 'N/A') for item in val_set] | |
| test_images = [item['metadata'].get('image_filename', 'N/A') for item in test_set] | |
| print(f" Train images: {train_images[:5]}{'...' if len(train_images) > 5 else ''}") | |
| print(f" Val images: {val_images}") | |
| print(f" Test images: {test_images[:5]}{'...' if len(test_images) > 5 else ''}") | |
| return train_set, val_set, test_set | |
| def load_scroll_dataset( | |
| images_dir: str = "images", | |
| dataset_config: List[Tuple[str, str, str]] = None, | |
| split: bool = True | |
| ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]: | |
| """ | |
| Convenience function to load image-based dataset (GENERIC). | |
| Args: | |
| images_dir: Directory containing images | |
| dataset_config: List of (image_filename, input_text, expected_output) tuples | |
| split: Whether to split into train/val/test | |
| Returns: | |
| If split=True: (train_set, val_set, test_set) | |
| If split=False: (full_dataset, [], []) | |
| Example (works for ANY task): | |
| dataset_config = [ | |
| ("img1.png", "What color is the sky?", "blue"), | |
| ("img2.png", "Count the dogs", "2"), | |
| ] | |
| train, val, test = load_scroll_dataset( | |
| images_dir="images", | |
| dataset_config=dataset_config | |
| ) | |
| """ | |
| loader = ScrollDatasetLoader(images_dir, dataset_config=dataset_config) | |
| dataset = loader.load_dataset() | |
| if split: | |
| return loader.split_dataset(dataset) | |
| else: | |
| return dataset, [], [] | |
| # Example usage (for testing the library loader itself) | |
| if __name__ == "__main__": | |
| print("🚀 Testing Scroll Dataset Loader...") | |
| print("⚠️ NOTE: This is a library class. Define your dataset in your test script.") | |
| print("\nExample:") | |
| print(" dataset_config = [") | |
| print(" ('image1.png', 'Scroll down by 50%', '3'),") | |
| print(" ('image2.png', 'Swipe left', '4'),") | |
| print(" ]") | |
| print(" train, val, test = load_scroll_dataset(") | |
| print(" images_dir='images',") | |
| print(" dataset_config=dataset_config") | |
| print(" )") | |