File size: 3,402 Bytes
d520909
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""
Reading Order Base Interface

Defines interfaces for reading order reconstruction.
"""

from abc import ABC, abstractmethod
from typing import List, Optional, Dict, Any, Tuple
from dataclasses import dataclass, field
from pydantic import BaseModel, Field

from ..schemas.core import BoundingBox, LayoutRegion, OCRRegion


class ReadingOrderConfig(BaseModel):
    """Configuration for reading order reconstruction."""
    # Method
    method: str = Field(
        default="rule_based",
        description="Method: rule_based or model_based"
    )

    # Column detection
    detect_columns: bool = Field(
        default=True,
        description="Attempt to detect multi-column layouts"
    )
    max_columns: int = Field(
        default=4,
        ge=1,
        description="Maximum number of columns to detect"
    )
    column_gap_threshold: float = Field(
        default=0.1,
        ge=0.0,
        le=1.0,
        description="Minimum gap ratio between columns"
    )

    # Reading direction
    reading_direction: str = Field(
        default="ltr",
        description="Reading direction: ltr (left-to-right) or rtl"
    )
    vertical_priority: bool = Field(
        default=True,
        description="Prioritize top-to-bottom over left-to-right"
    )

    # Element handling
    respect_layout_types: bool = Field(
        default=True,
        description="Respect layout region boundaries"
    )
    header_footer_separate: bool = Field(
        default=True,
        description="Keep headers/footers at start/end"
    )


@dataclass
class ReadingOrderResult:
    """Result of reading order reconstruction."""
    # Ordered indices
    order: List[int] = field(default_factory=list)

    # Ordered regions (if provided)
    ordered_regions: List[Any] = field(default_factory=list)

    # Column information
    num_columns: int = 1
    column_assignments: Dict[int, int] = field(default_factory=dict)

    # Processing info
    processing_time_ms: float = 0.0
    success: bool = True
    error: Optional[str] = None

    def get_ordered_text(self, regions: List[OCRRegion]) -> str:
        """Get text in reading order."""
        if not self.order:
            return ""
        ordered_texts = [regions[i].text for i in self.order if i < len(regions)]
        return " ".join(ordered_texts)


class ReadingOrderReconstructor(ABC):
    """Abstract base class for reading order reconstruction."""

    def __init__(self, config: Optional[ReadingOrderConfig] = None):
        self.config = config or ReadingOrderConfig()
        self._initialized = False

    @abstractmethod
    def initialize(self):
        """Initialize the reconstructor."""
        pass

    @abstractmethod
    def reconstruct(
        self,
        regions: List[Any],
        layout_regions: Optional[List[LayoutRegion]] = None,
        page_width: Optional[int] = None,
        page_height: Optional[int] = None,
    ) -> ReadingOrderResult:
        """
        Reconstruct reading order for regions.

        Args:
            regions: OCR regions or layout regions
            layout_regions: Optional layout regions for context
            page_width: Page width in pixels
            page_height: Page height in pixels

        Returns:
            ReadingOrderResult with ordered indices
        """
        pass

    @property
    def is_initialized(self) -> bool:
        return self._initialized