""" Reading Order Base Interface Defines interfaces for reading order reconstruction. """ from abc import ABC, abstractmethod from typing import List, Optional, Dict, Any, Tuple from dataclasses import dataclass, field from pydantic import BaseModel, Field from ..schemas.core import BoundingBox, LayoutRegion, OCRRegion class ReadingOrderConfig(BaseModel): """Configuration for reading order reconstruction.""" # Method method: str = Field( default="rule_based", description="Method: rule_based or model_based" ) # Column detection detect_columns: bool = Field( default=True, description="Attempt to detect multi-column layouts" ) max_columns: int = Field( default=4, ge=1, description="Maximum number of columns to detect" ) column_gap_threshold: float = Field( default=0.1, ge=0.0, le=1.0, description="Minimum gap ratio between columns" ) # Reading direction reading_direction: str = Field( default="ltr", description="Reading direction: ltr (left-to-right) or rtl" ) vertical_priority: bool = Field( default=True, description="Prioritize top-to-bottom over left-to-right" ) # Element handling respect_layout_types: bool = Field( default=True, description="Respect layout region boundaries" ) header_footer_separate: bool = Field( default=True, description="Keep headers/footers at start/end" ) @dataclass class ReadingOrderResult: """Result of reading order reconstruction.""" # Ordered indices order: List[int] = field(default_factory=list) # Ordered regions (if provided) ordered_regions: List[Any] = field(default_factory=list) # Column information num_columns: int = 1 column_assignments: Dict[int, int] = field(default_factory=dict) # Processing info processing_time_ms: float = 0.0 success: bool = True error: Optional[str] = None def get_ordered_text(self, regions: List[OCRRegion]) -> str: """Get text in reading order.""" if not self.order: return "" ordered_texts = [regions[i].text for i in self.order if i < len(regions)] return " ".join(ordered_texts) class ReadingOrderReconstructor(ABC): """Abstract base class for reading order reconstruction.""" def __init__(self, config: Optional[ReadingOrderConfig] = None): self.config = config or ReadingOrderConfig() self._initialized = False @abstractmethod def initialize(self): """Initialize the reconstructor.""" pass @abstractmethod def reconstruct( self, regions: List[Any], layout_regions: Optional[List[LayoutRegion]] = None, page_width: Optional[int] = None, page_height: Optional[int] = None, ) -> ReadingOrderResult: """ Reconstruct reading order for regions. Args: regions: OCR regions or layout regions layout_regions: Optional layout regions for context page_width: Page width in pixels page_height: Page height in pixels Returns: ReadingOrderResult with ordered indices """ pass @property def is_initialized(self) -> bool: return self._initialized