MHamdan's picture
Initial commit: SPARKNET framework
d520909
"""
Document Intelligence Model Interfaces
Pluggable model interfaces for document understanding:
- OCRModel: Text recognition
- LayoutModel: Layout detection
- ReadingOrderModel: Reading order determination
- TableModel: Table structure extraction
- ChartModel: Chart/graph understanding
- VisionLanguageModel: Multimodal understanding
"""
from .base import (
# Base classes
BaseModel,
BatchableModel,
# Configuration
ModelConfig,
ModelMetadata,
ModelCapability,
# Utilities
ImageInput,
normalize_image_input,
ensure_pil_image,
)
from .ocr import (
# Config
OCRConfig,
OCREngine,
# Data classes
OCRWord,
OCRLine,
OCRBlock,
OCRResult,
# Model interface
OCRModel,
)
from .layout import (
# Config
LayoutConfig,
# Data classes
LayoutRegionType,
LayoutRegion,
LayoutResult,
# Model interfaces
LayoutModel,
ReadingOrderModel,
HeuristicReadingOrderModel,
)
from .table import (
# Config
TableConfig,
# Data classes
TableCellType,
TableStructure,
TableExtractionResult,
# Model interface
TableModel,
)
from .chart import (
# Config
ChartConfig,
# Data classes
ChartType,
AxisInfo,
LegendItem,
DataSeries,
TrendInfo,
ChartStructure,
ChartExtractionResult,
# Model interface
ChartModel,
)
from .vlm import (
# Config
VLMConfig,
VLMTask,
# Data classes
VLMMessage,
VLMResponse,
DocumentQAResult,
FieldExtractionVLMResult,
# Model interface
VisionLanguageModel,
)
__all__ = [
# Base
"BaseModel",
"BatchableModel",
"ModelConfig",
"ModelMetadata",
"ModelCapability",
"ImageInput",
"normalize_image_input",
"ensure_pil_image",
# OCR
"OCRConfig",
"OCREngine",
"OCRWord",
"OCRLine",
"OCRBlock",
"OCRResult",
"OCRModel",
# Layout
"LayoutConfig",
"LayoutRegionType",
"LayoutRegion",
"LayoutResult",
"LayoutModel",
"ReadingOrderModel",
"HeuristicReadingOrderModel",
# Table
"TableConfig",
"TableCellType",
"TableStructure",
"TableExtractionResult",
"TableModel",
# Chart
"ChartConfig",
"ChartType",
"AxisInfo",
"LegendItem",
"DataSeries",
"TrendInfo",
"ChartStructure",
"ChartExtractionResult",
"ChartModel",
# VLM
"VLMConfig",
"VLMTask",
"VLMMessage",
"VLMResponse",
"DocumentQAResult",
"FieldExtractionVLMResult",
"VisionLanguageModel",
]