|
|
""" |
|
|
Reading Order Base Interface |
|
|
|
|
|
Defines interfaces for reading order reconstruction. |
|
|
""" |
|
|
|
|
|
from abc import ABC, abstractmethod |
|
|
from typing import List, Optional, Dict, Any, Tuple |
|
|
from dataclasses import dataclass, field |
|
|
from pydantic import BaseModel, Field |
|
|
|
|
|
from ..schemas.core import BoundingBox, LayoutRegion, OCRRegion |
|
|
|
|
|
|
|
|
class ReadingOrderConfig(BaseModel): |
|
|
"""Configuration for reading order reconstruction.""" |
|
|
|
|
|
method: str = Field( |
|
|
default="rule_based", |
|
|
description="Method: rule_based or model_based" |
|
|
) |
|
|
|
|
|
|
|
|
detect_columns: bool = Field( |
|
|
default=True, |
|
|
description="Attempt to detect multi-column layouts" |
|
|
) |
|
|
max_columns: int = Field( |
|
|
default=4, |
|
|
ge=1, |
|
|
description="Maximum number of columns to detect" |
|
|
) |
|
|
column_gap_threshold: float = Field( |
|
|
default=0.1, |
|
|
ge=0.0, |
|
|
le=1.0, |
|
|
description="Minimum gap ratio between columns" |
|
|
) |
|
|
|
|
|
|
|
|
reading_direction: str = Field( |
|
|
default="ltr", |
|
|
description="Reading direction: ltr (left-to-right) or rtl" |
|
|
) |
|
|
vertical_priority: bool = Field( |
|
|
default=True, |
|
|
description="Prioritize top-to-bottom over left-to-right" |
|
|
) |
|
|
|
|
|
|
|
|
respect_layout_types: bool = Field( |
|
|
default=True, |
|
|
description="Respect layout region boundaries" |
|
|
) |
|
|
header_footer_separate: bool = Field( |
|
|
default=True, |
|
|
description="Keep headers/footers at start/end" |
|
|
) |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class ReadingOrderResult: |
|
|
"""Result of reading order reconstruction.""" |
|
|
|
|
|
order: List[int] = field(default_factory=list) |
|
|
|
|
|
|
|
|
ordered_regions: List[Any] = field(default_factory=list) |
|
|
|
|
|
|
|
|
num_columns: int = 1 |
|
|
column_assignments: Dict[int, int] = field(default_factory=dict) |
|
|
|
|
|
|
|
|
processing_time_ms: float = 0.0 |
|
|
success: bool = True |
|
|
error: Optional[str] = None |
|
|
|
|
|
def get_ordered_text(self, regions: List[OCRRegion]) -> str: |
|
|
"""Get text in reading order.""" |
|
|
if not self.order: |
|
|
return "" |
|
|
ordered_texts = [regions[i].text for i in self.order if i < len(regions)] |
|
|
return " ".join(ordered_texts) |
|
|
|
|
|
|
|
|
class ReadingOrderReconstructor(ABC): |
|
|
"""Abstract base class for reading order reconstruction.""" |
|
|
|
|
|
def __init__(self, config: Optional[ReadingOrderConfig] = None): |
|
|
self.config = config or ReadingOrderConfig() |
|
|
self._initialized = False |
|
|
|
|
|
@abstractmethod |
|
|
def initialize(self): |
|
|
"""Initialize the reconstructor.""" |
|
|
pass |
|
|
|
|
|
@abstractmethod |
|
|
def reconstruct( |
|
|
self, |
|
|
regions: List[Any], |
|
|
layout_regions: Optional[List[LayoutRegion]] = None, |
|
|
page_width: Optional[int] = None, |
|
|
page_height: Optional[int] = None, |
|
|
) -> ReadingOrderResult: |
|
|
""" |
|
|
Reconstruct reading order for regions. |
|
|
|
|
|
Args: |
|
|
regions: OCR regions or layout regions |
|
|
layout_regions: Optional layout regions for context |
|
|
page_width: Page width in pixels |
|
|
page_height: Page height in pixels |
|
|
|
|
|
Returns: |
|
|
ReadingOrderResult with ordered indices |
|
|
""" |
|
|
pass |
|
|
|
|
|
@property |
|
|
def is_initialized(self) -> bool: |
|
|
return self._initialized |
|
|
|