File size: 2,571 Bytes
d520909
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
"""
Document Intelligence Model Interfaces

Pluggable model interfaces for document understanding:
- OCRModel: Text recognition
- LayoutModel: Layout detection
- ReadingOrderModel: Reading order determination
- TableModel: Table structure extraction
- ChartModel: Chart/graph understanding
- VisionLanguageModel: Multimodal understanding
"""

from .base import (
    # Base classes
    BaseModel,
    BatchableModel,
    # Configuration
    ModelConfig,
    ModelMetadata,
    ModelCapability,
    # Utilities
    ImageInput,
    normalize_image_input,
    ensure_pil_image,
)

from .ocr import (
    # Config
    OCRConfig,
    OCREngine,
    # Data classes
    OCRWord,
    OCRLine,
    OCRBlock,
    OCRResult,
    # Model interface
    OCRModel,
)

from .layout import (
    # Config
    LayoutConfig,
    # Data classes
    LayoutRegionType,
    LayoutRegion,
    LayoutResult,
    # Model interfaces
    LayoutModel,
    ReadingOrderModel,
    HeuristicReadingOrderModel,
)

from .table import (
    # Config
    TableConfig,
    # Data classes
    TableCellType,
    TableStructure,
    TableExtractionResult,
    # Model interface
    TableModel,
)

from .chart import (
    # Config
    ChartConfig,
    # Data classes
    ChartType,
    AxisInfo,
    LegendItem,
    DataSeries,
    TrendInfo,
    ChartStructure,
    ChartExtractionResult,
    # Model interface
    ChartModel,
)

from .vlm import (
    # Config
    VLMConfig,
    VLMTask,
    # Data classes
    VLMMessage,
    VLMResponse,
    DocumentQAResult,
    FieldExtractionVLMResult,
    # Model interface
    VisionLanguageModel,
)

__all__ = [
    # Base
    "BaseModel",
    "BatchableModel",
    "ModelConfig",
    "ModelMetadata",
    "ModelCapability",
    "ImageInput",
    "normalize_image_input",
    "ensure_pil_image",
    # OCR
    "OCRConfig",
    "OCREngine",
    "OCRWord",
    "OCRLine",
    "OCRBlock",
    "OCRResult",
    "OCRModel",
    # Layout
    "LayoutConfig",
    "LayoutRegionType",
    "LayoutRegion",
    "LayoutResult",
    "LayoutModel",
    "ReadingOrderModel",
    "HeuristicReadingOrderModel",
    # Table
    "TableConfig",
    "TableCellType",
    "TableStructure",
    "TableExtractionResult",
    "TableModel",
    # Chart
    "ChartConfig",
    "ChartType",
    "AxisInfo",
    "LegendItem",
    "DataSeries",
    "TrendInfo",
    "ChartStructure",
    "ChartExtractionResult",
    "ChartModel",
    # VLM
    "VLMConfig",
    "VLMTask",
    "VLMMessage",
    "VLMResponse",
    "DocumentQAResult",
    "FieldExtractionVLMResult",
    "VisionLanguageModel",
]