Spaces:
Sleeping
Sleeping
| """ | |
| Agent 2: Token Normalizer & Structurer | |
| Design System Extractor v2 | |
| Persona: Design System Librarian | |
| Responsibilities: | |
| - Clean noisy extraction data | |
| - Deduplicate similar tokens (colors within threshold, similar spacing) | |
| - Infer naming patterns from class names and contexts | |
| - Tag tokens as: detected | inferred | low-confidence | |
| - Group colors by role (primary, secondary, neutral, etc.) | |
| """ | |
| import re | |
| from typing import Optional | |
| from collections import defaultdict | |
| from core.token_schema import ( | |
| ColorToken, | |
| TypographyToken, | |
| SpacingToken, | |
| ExtractedTokens, | |
| NormalizedTokens, | |
| Confidence, | |
| TokenSource, | |
| ) | |
| from core.color_utils import ( | |
| parse_color, | |
| normalize_hex, | |
| categorize_color, | |
| ) | |
| class TokenNormalizer: | |
| """ | |
| Normalizes and structures extracted tokens. | |
| This is Agent 2's job — taking raw extraction data and | |
| organizing it into a clean, deduplicated structure. | |
| """ | |
| def __init__(self): | |
| # Thresholds for duplicate detection | |
| self.color_similarity_threshold = 10 # Delta in RGB space | |
| self.spacing_merge_threshold = 2 # px difference to merge | |
| # Naming patterns | |
| self.color_role_keywords = { | |
| "primary": ["primary", "brand", "main", "accent"], | |
| "secondary": ["secondary", "alt", "alternate"], | |
| "success": ["success", "green", "positive", "valid"], | |
| "warning": ["warning", "yellow", "caution", "alert"], | |
| "error": ["error", "red", "danger", "invalid", "negative"], | |
| "info": ["info", "blue", "notice"], | |
| "neutral": ["gray", "grey", "neutral", "muted", "subtle"], | |
| "background": ["bg", "background", "surface"], | |
| "text": ["text", "foreground", "content", "body"], | |
| "border": ["border", "divider", "separator", "line"], | |
| } | |
| def normalize(self, extracted: ExtractedTokens) -> NormalizedTokens: | |
| """ | |
| Normalize extracted tokens. | |
| Args: | |
| extracted: Raw extraction results from Agent 1 | |
| Returns: | |
| NormalizedTokens with cleaned, deduplicated data | |
| """ | |
| # Process each token type (returns lists) | |
| colors_list = self._normalize_colors(extracted.colors) | |
| typography_list = self._normalize_typography(extracted.typography) | |
| spacing_list = self._normalize_spacing(extracted.spacing) | |
| # Convert to dicts keyed by suggested_name | |
| colors_dict = {} | |
| for c in colors_list: | |
| key = c.suggested_name or c.value | |
| colors_dict[key] = c | |
| typography_dict = {} | |
| for t in typography_list: | |
| key = t.suggested_name or f"{t.font_family}-{t.font_size}" | |
| typography_dict[key] = t | |
| spacing_dict = {} | |
| for s in spacing_list: | |
| key = s.suggested_name or s.value | |
| spacing_dict[key] = s | |
| # Convert radius and shadows to dicts | |
| radius_dict = {} | |
| for r in extracted.radius: | |
| key = f"radius-{r.value}" | |
| radius_dict[key] = r | |
| shadows_dict = {} | |
| for s in extracted.shadows: | |
| key = f"shadow-{hash(s.value) % 1000}" | |
| shadows_dict[key] = s | |
| # Create normalized result | |
| normalized = NormalizedTokens( | |
| viewport=extracted.viewport, | |
| source_url=extracted.source_url, | |
| colors=colors_dict, | |
| typography=typography_dict, | |
| spacing=spacing_dict, | |
| radius=radius_dict, | |
| shadows=shadows_dict, | |
| font_families=extracted.font_families, | |
| detected_spacing_base=extracted.spacing_base, | |
| detected_naming_convention=extracted.naming_convention, | |
| ) | |
| return normalized | |
| def _normalize_colors(self, colors: list[ColorToken]) -> list[ColorToken]: | |
| """ | |
| Normalize color tokens: | |
| - Deduplicate similar colors | |
| - Infer color roles | |
| - Assign suggested names | |
| - Calculate confidence | |
| """ | |
| if not colors: | |
| return [] | |
| # Step 1: Deduplicate by exact hex value | |
| unique_colors = {} | |
| for color in colors: | |
| hex_val = normalize_hex(color.value) | |
| if hex_val in unique_colors: | |
| # Merge frequency and contexts | |
| existing = unique_colors[hex_val] | |
| existing.frequency += color.frequency | |
| existing.contexts = list(set(existing.contexts + color.contexts)) | |
| existing.elements = list(set(existing.elements + color.elements)) | |
| existing.css_properties = list(set(existing.css_properties + color.css_properties)) | |
| else: | |
| color.value = hex_val | |
| unique_colors[hex_val] = color | |
| # Step 2: Merge visually similar colors | |
| merged_colors = self._merge_similar_colors(list(unique_colors.values())) | |
| # Step 3: Infer roles and names | |
| for color in merged_colors: | |
| role = self._infer_color_role(color) | |
| if role: | |
| color.suggested_name = self._generate_color_name(color, role) | |
| else: | |
| color.suggested_name = self._generate_color_name_from_value(color) | |
| # Update confidence based on frequency | |
| color.confidence = self._calculate_confidence(color.frequency) | |
| # Sort by frequency (most used first) | |
| merged_colors.sort(key=lambda c: -c.frequency) | |
| return merged_colors | |
| def _merge_similar_colors(self, colors: list[ColorToken]) -> list[ColorToken]: | |
| """Merge colors that are visually very similar.""" | |
| if len(colors) <= 1: | |
| return colors | |
| merged = [] | |
| used = set() | |
| for i, color1 in enumerate(colors): | |
| if i in used: | |
| continue | |
| # Find similar colors | |
| similar_group = [color1] | |
| for j, color2 in enumerate(colors[i+1:], i+1): | |
| if j in used: | |
| continue | |
| if self._colors_are_similar(color1.value, color2.value): | |
| similar_group.append(color2) | |
| used.add(j) | |
| # Merge the group - keep the most frequent | |
| similar_group.sort(key=lambda c: -c.frequency) | |
| primary = similar_group[0] | |
| # Aggregate data from similar colors | |
| for other in similar_group[1:]: | |
| primary.frequency += other.frequency | |
| primary.contexts = list(set(primary.contexts + other.contexts)) | |
| primary.elements = list(set(primary.elements + other.elements)) | |
| merged.append(primary) | |
| used.add(i) | |
| return merged | |
| def _colors_are_similar(self, hex1: str, hex2: str) -> bool: | |
| """Check if two colors are visually similar.""" | |
| try: | |
| parsed1 = parse_color(hex1) | |
| parsed2 = parse_color(hex2) | |
| if parsed1 is None or parsed2 is None: | |
| return False | |
| if parsed1.rgb is None or parsed2.rgb is None: | |
| return False | |
| rgb1 = parsed1.rgb | |
| rgb2 = parsed2.rgb | |
| # Calculate Euclidean distance in RGB space | |
| distance = sum((a - b) ** 2 for a, b in zip(rgb1, rgb2)) ** 0.5 | |
| return distance < self.color_similarity_threshold | |
| except Exception: | |
| return False | |
| def _infer_color_role(self, color: ColorToken) -> Optional[str]: | |
| """Infer the semantic role of a color from its contexts.""" | |
| all_context = " ".join(color.contexts + color.elements).lower() | |
| for role, keywords in self.color_role_keywords.items(): | |
| for keyword in keywords: | |
| if keyword in all_context: | |
| return role | |
| # Try to infer from color category | |
| category = categorize_color(color.value) | |
| if category in ["gray", "white", "black"]: | |
| return "neutral" | |
| return None | |
| def _generate_color_name(self, color: ColorToken, role: str) -> str: | |
| """Generate a semantic name for a color.""" | |
| # Determine shade level based on luminance | |
| parsed = parse_color(color.value) | |
| if parsed and parsed.rgb: | |
| rgb = parsed.rgb | |
| luminance = (0.299 * rgb[0] + 0.587 * rgb[1] + 0.114 * rgb[2]) / 255 | |
| if luminance > 0.8: | |
| shade = "50" | |
| elif luminance > 0.6: | |
| shade = "200" | |
| elif luminance > 0.4: | |
| shade = "500" | |
| elif luminance > 0.2: | |
| shade = "700" | |
| else: | |
| shade = "900" | |
| else: | |
| shade = "500" | |
| return f"color.{role}.{shade}" | |
| def _generate_color_name_from_value(self, color: ColorToken) -> str: | |
| """Generate a name based on the color value itself.""" | |
| category = categorize_color(color.value) | |
| parsed = parse_color(color.value) | |
| if parsed and parsed.rgb: | |
| rgb = parsed.rgb | |
| luminance = (0.299 * rgb[0] + 0.587 * rgb[1] + 0.114 * rgb[2]) / 255 | |
| if luminance > 0.6: | |
| shade = "light" | |
| elif luminance > 0.3: | |
| shade = "base" | |
| else: | |
| shade = "dark" | |
| else: | |
| shade = "base" | |
| return f"color.{category}.{shade}" | |
| def _normalize_typography(self, typography: list[TypographyToken]) -> list[TypographyToken]: | |
| """ | |
| Normalize typography tokens: | |
| - Deduplicate identical styles | |
| - Infer type scale categories | |
| - Assign suggested names | |
| """ | |
| if not typography: | |
| return [] | |
| # Deduplicate by unique style combination | |
| unique_typo = {} | |
| for typo in typography: | |
| key = f"{typo.font_family}|{typo.font_size}|{typo.font_weight}|{typo.line_height}" | |
| if key in unique_typo: | |
| existing = unique_typo[key] | |
| existing.frequency += typo.frequency | |
| existing.elements = list(set(existing.elements + typo.elements)) | |
| else: | |
| unique_typo[key] = typo | |
| result = list(unique_typo.values()) | |
| # Infer names based on size and elements | |
| for typo in result: | |
| typo.suggested_name = self._generate_typography_name(typo) | |
| typo.confidence = self._calculate_confidence(typo.frequency) | |
| # Sort by font size (largest first) | |
| result.sort(key=lambda t: -self._parse_font_size(t.font_size)) | |
| return result | |
| def _generate_typography_name(self, typo: TypographyToken) -> str: | |
| """Generate a semantic name for typography.""" | |
| size_px = self._parse_font_size(typo.font_size) | |
| elements = " ".join(typo.elements).lower() | |
| # Determine category from elements | |
| if any(h in elements for h in ["h1", "hero", "display"]): | |
| category = "display" | |
| elif any(h in elements for h in ["h2", "h3", "h4", "h5", "h6", "heading", "title"]): | |
| category = "heading" | |
| elif any(h in elements for h in ["label", "caption", "small", "meta"]): | |
| category = "label" | |
| elif any(h in elements for h in ["body", "p", "paragraph", "text"]): | |
| category = "body" | |
| else: | |
| category = "text" | |
| # Determine size tier | |
| if size_px >= 32: | |
| size_tier = "xl" | |
| elif size_px >= 24: | |
| size_tier = "lg" | |
| elif size_px >= 18: | |
| size_tier = "md" | |
| elif size_px >= 14: | |
| size_tier = "sm" | |
| else: | |
| size_tier = "xs" | |
| return f"font.{category}.{size_tier}" | |
| def _parse_font_size(self, size: str) -> float: | |
| """Parse font size string to pixels.""" | |
| if not size: | |
| return 16 | |
| size = size.lower().strip() | |
| # Handle px | |
| if "px" in size: | |
| try: | |
| return float(size.replace("px", "")) | |
| except ValueError: | |
| return 16 | |
| # Handle rem (assume 16px base) | |
| if "rem" in size: | |
| try: | |
| return float(size.replace("rem", "")) * 16 | |
| except ValueError: | |
| return 16 | |
| # Handle em (assume 16px base) | |
| if "em" in size: | |
| try: | |
| return float(size.replace("em", "")) * 16 | |
| except ValueError: | |
| return 16 | |
| # Try plain number | |
| try: | |
| return float(size) | |
| except ValueError: | |
| return 16 | |
| def _normalize_spacing(self, spacing: list[SpacingToken]) -> list[SpacingToken]: | |
| """ | |
| Normalize spacing tokens: | |
| - Merge similar values | |
| - Align to base-8 grid if close | |
| - Assign suggested names | |
| """ | |
| if not spacing: | |
| return [] | |
| # Deduplicate by value | |
| unique_spacing = {} | |
| for space in spacing: | |
| key = space.value | |
| if key in unique_spacing: | |
| existing = unique_spacing[key] | |
| existing.frequency += space.frequency | |
| existing.contexts = list(set(existing.contexts + space.contexts)) | |
| else: | |
| unique_spacing[key] = space | |
| result = list(unique_spacing.values()) | |
| # Merge very similar values | |
| result = self._merge_similar_spacing(result) | |
| # Assign names | |
| for space in result: | |
| space.suggested_name = self._generate_spacing_name(space) | |
| space.confidence = self._calculate_confidence(space.frequency) | |
| # Sort by value | |
| result.sort(key=lambda s: s.value_px) | |
| return result | |
| def _merge_similar_spacing(self, spacing: list[SpacingToken]) -> list[SpacingToken]: | |
| """Merge spacing values that are very close.""" | |
| if len(spacing) <= 1: | |
| return spacing | |
| # Sort by pixel value | |
| spacing.sort(key=lambda s: s.value_px) | |
| merged = [] | |
| i = 0 | |
| while i < len(spacing): | |
| current = spacing[i] | |
| group = [current] | |
| # Find adjacent similar values | |
| j = i + 1 | |
| while j < len(spacing): | |
| if abs(spacing[j].value_px - current.value_px) <= self.spacing_merge_threshold: | |
| group.append(spacing[j]) | |
| j += 1 | |
| else: | |
| break | |
| # Merge group - prefer base-8 aligned value or most frequent | |
| group.sort(key=lambda s: (-s.fits_base_8, -s.frequency)) | |
| primary = group[0] | |
| for other in group[1:]: | |
| primary.frequency += other.frequency | |
| primary.contexts = list(set(primary.contexts + other.contexts)) | |
| merged.append(primary) | |
| i = j | |
| return merged | |
| def _generate_spacing_name(self, space: SpacingToken) -> str: | |
| """Generate a semantic name for spacing.""" | |
| px = space.value_px | |
| # Map to t-shirt sizes based on value | |
| if px <= 2: | |
| size = "px" | |
| elif px <= 4: | |
| size = "0.5" | |
| elif px <= 8: | |
| size = "1" | |
| elif px <= 12: | |
| size = "1.5" | |
| elif px <= 16: | |
| size = "2" | |
| elif px <= 20: | |
| size = "2.5" | |
| elif px <= 24: | |
| size = "3" | |
| elif px <= 32: | |
| size = "4" | |
| elif px <= 40: | |
| size = "5" | |
| elif px <= 48: | |
| size = "6" | |
| elif px <= 64: | |
| size = "8" | |
| elif px <= 80: | |
| size = "10" | |
| elif px <= 96: | |
| size = "12" | |
| else: | |
| size = str(int(px / 4)) | |
| return f"space.{size}" | |
| def _calculate_confidence(self, frequency: int) -> Confidence: | |
| """Calculate confidence based on frequency.""" | |
| if frequency >= 10: | |
| return Confidence.HIGH | |
| elif frequency >= 3: | |
| return Confidence.MEDIUM | |
| else: | |
| return Confidence.LOW | |
| def normalize_tokens(extracted: ExtractedTokens) -> NormalizedTokens: | |
| """Convenience function to normalize tokens.""" | |
| normalizer = TokenNormalizer() | |
| return normalizer.normalize(extracted) | |