File size: 2,571 Bytes
d520909 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
"""
Document Intelligence Model Interfaces
Pluggable model interfaces for document understanding:
- OCRModel: Text recognition
- LayoutModel: Layout detection
- ReadingOrderModel: Reading order determination
- TableModel: Table structure extraction
- ChartModel: Chart/graph understanding
- VisionLanguageModel: Multimodal understanding
"""
from .base import (
# Base classes
BaseModel,
BatchableModel,
# Configuration
ModelConfig,
ModelMetadata,
ModelCapability,
# Utilities
ImageInput,
normalize_image_input,
ensure_pil_image,
)
from .ocr import (
# Config
OCRConfig,
OCREngine,
# Data classes
OCRWord,
OCRLine,
OCRBlock,
OCRResult,
# Model interface
OCRModel,
)
from .layout import (
# Config
LayoutConfig,
# Data classes
LayoutRegionType,
LayoutRegion,
LayoutResult,
# Model interfaces
LayoutModel,
ReadingOrderModel,
HeuristicReadingOrderModel,
)
from .table import (
# Config
TableConfig,
# Data classes
TableCellType,
TableStructure,
TableExtractionResult,
# Model interface
TableModel,
)
from .chart import (
# Config
ChartConfig,
# Data classes
ChartType,
AxisInfo,
LegendItem,
DataSeries,
TrendInfo,
ChartStructure,
ChartExtractionResult,
# Model interface
ChartModel,
)
from .vlm import (
# Config
VLMConfig,
VLMTask,
# Data classes
VLMMessage,
VLMResponse,
DocumentQAResult,
FieldExtractionVLMResult,
# Model interface
VisionLanguageModel,
)
__all__ = [
# Base
"BaseModel",
"BatchableModel",
"ModelConfig",
"ModelMetadata",
"ModelCapability",
"ImageInput",
"normalize_image_input",
"ensure_pil_image",
# OCR
"OCRConfig",
"OCREngine",
"OCRWord",
"OCRLine",
"OCRBlock",
"OCRResult",
"OCRModel",
# Layout
"LayoutConfig",
"LayoutRegionType",
"LayoutRegion",
"LayoutResult",
"LayoutModel",
"ReadingOrderModel",
"HeuristicReadingOrderModel",
# Table
"TableConfig",
"TableCellType",
"TableStructure",
"TableExtractionResult",
"TableModel",
# Chart
"ChartConfig",
"ChartType",
"AxisInfo",
"LegendItem",
"DataSeries",
"TrendInfo",
"ChartStructure",
"ChartExtractionResult",
"ChartModel",
# VLM
"VLMConfig",
"VLMTask",
"VLMMessage",
"VLMResponse",
"DocumentQAResult",
"FieldExtractionVLMResult",
"VisionLanguageModel",
]
|