MHamdan's picture
Initial commit: SPARKNET framework
d520909
"""
Reading Order Base Interface
Defines interfaces for reading order reconstruction.
"""
from abc import ABC, abstractmethod
from typing import List, Optional, Dict, Any, Tuple
from dataclasses import dataclass, field
from pydantic import BaseModel, Field
from ..schemas.core import BoundingBox, LayoutRegion, OCRRegion
class ReadingOrderConfig(BaseModel):
"""Configuration for reading order reconstruction."""
# Method
method: str = Field(
default="rule_based",
description="Method: rule_based or model_based"
)
# Column detection
detect_columns: bool = Field(
default=True,
description="Attempt to detect multi-column layouts"
)
max_columns: int = Field(
default=4,
ge=1,
description="Maximum number of columns to detect"
)
column_gap_threshold: float = Field(
default=0.1,
ge=0.0,
le=1.0,
description="Minimum gap ratio between columns"
)
# Reading direction
reading_direction: str = Field(
default="ltr",
description="Reading direction: ltr (left-to-right) or rtl"
)
vertical_priority: bool = Field(
default=True,
description="Prioritize top-to-bottom over left-to-right"
)
# Element handling
respect_layout_types: bool = Field(
default=True,
description="Respect layout region boundaries"
)
header_footer_separate: bool = Field(
default=True,
description="Keep headers/footers at start/end"
)
@dataclass
class ReadingOrderResult:
"""Result of reading order reconstruction."""
# Ordered indices
order: List[int] = field(default_factory=list)
# Ordered regions (if provided)
ordered_regions: List[Any] = field(default_factory=list)
# Column information
num_columns: int = 1
column_assignments: Dict[int, int] = field(default_factory=dict)
# Processing info
processing_time_ms: float = 0.0
success: bool = True
error: Optional[str] = None
def get_ordered_text(self, regions: List[OCRRegion]) -> str:
"""Get text in reading order."""
if not self.order:
return ""
ordered_texts = [regions[i].text for i in self.order if i < len(regions)]
return " ".join(ordered_texts)
class ReadingOrderReconstructor(ABC):
"""Abstract base class for reading order reconstruction."""
def __init__(self, config: Optional[ReadingOrderConfig] = None):
self.config = config or ReadingOrderConfig()
self._initialized = False
@abstractmethod
def initialize(self):
"""Initialize the reconstructor."""
pass
@abstractmethod
def reconstruct(
self,
regions: List[Any],
layout_regions: Optional[List[LayoutRegion]] = None,
page_width: Optional[int] = None,
page_height: Optional[int] = None,
) -> ReadingOrderResult:
"""
Reconstruct reading order for regions.
Args:
regions: OCR regions or layout regions
layout_regions: Optional layout regions for context
page_width: Page width in pixels
page_height: Page height in pixels
Returns:
ReadingOrderResult with ordered indices
"""
pass
@property
def is_initialized(self) -> bool:
return self._initialized