File size: 2,884 Bytes
d520909
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""
SPARKNET Document Intelligence

Vision-first agentic document understanding platform.

Modules:
- chunks: Core data models (BoundingBox, DocumentChunk, EvidenceRef, etc.)
- io: Document loading and rendering (PDF, images)
- models: Pluggable model interfaces (OCR, Layout, Table, Chart, VLM)
- parsing: Document parsing and semantic chunking
- grounding: Visual evidence and cropping utilities
- extraction: Schema-driven field extraction
- validation: Result validation and confidence scoring
- tools: Agent tool implementations
"""

from .chunks import (
    # Bounding box
    BoundingBox,
    # Chunk types
    ChunkType,
    ConfidenceLevel,
    # Base chunks
    DocumentChunk,
    # Specialized chunks
    TableCell,
    TableChunk,
    ChartDataPoint,
    ChartChunk,
    FormFieldChunk,
    # Evidence
    EvidenceRef,
    # Parse results
    PageResult,
    ParseResult,
    # Extraction
    FieldExtraction,
    ExtractionResult,
    # Classification
    DocumentType,
    ClassificationResult,
)

from .io import (
    DocumentFormat,
    PageInfo,
    DocumentInfo,
    RenderOptions,
    load_document,
    load_pdf,
    load_image,
    get_document_cache,
)

from .parsing import (
    ParserConfig,
    DocumentParser,
    parse_document,
    SemanticChunker,
    ChunkingConfig,
)

from .grounding import (
    EvidenceBuilder,
    EvidenceTracker,
    CropManager,
    crop_region,
    crop_chunk,
    create_annotated_image,
    highlight_region,
)

from .extraction import (
    FieldType,
    FieldSpec,
    ExtractionSchema,
    ExtractionConfig,
    FieldExtractor,
    ExtractionValidator,
    ValidationResult,
    # Pre-built schemas
    create_invoice_schema,
    create_receipt_schema,
    create_contract_schema,
)

__version__ = "0.1.0"

__all__ = [
    # Version
    "__version__",
    # Chunks
    "BoundingBox",
    "ChunkType",
    "ConfidenceLevel",
    "DocumentChunk",
    "TableCell",
    "TableChunk",
    "ChartDataPoint",
    "ChartChunk",
    "FormFieldChunk",
    "EvidenceRef",
    "PageResult",
    "ParseResult",
    "FieldExtraction",
    "ExtractionResult",
    "DocumentType",
    "ClassificationResult",
    # IO
    "DocumentFormat",
    "PageInfo",
    "DocumentInfo",
    "RenderOptions",
    "load_document",
    "load_pdf",
    "load_image",
    "get_document_cache",
    # Parsing
    "ParserConfig",
    "DocumentParser",
    "parse_document",
    "SemanticChunker",
    "ChunkingConfig",
    # Grounding
    "EvidenceBuilder",
    "EvidenceTracker",
    "CropManager",
    "crop_region",
    "crop_chunk",
    "create_annotated_image",
    "highlight_region",
    # Extraction
    "FieldType",
    "FieldSpec",
    "ExtractionSchema",
    "ExtractionConfig",
    "FieldExtractor",
    "ExtractionValidator",
    "ValidationResult",
    "create_invoice_schema",
    "create_receipt_schema",
    "create_contract_schema",
]