File size: 2,884 Bytes
d520909 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
"""
SPARKNET Document Intelligence
Vision-first agentic document understanding platform.
Modules:
- chunks: Core data models (BoundingBox, DocumentChunk, EvidenceRef, etc.)
- io: Document loading and rendering (PDF, images)
- models: Pluggable model interfaces (OCR, Layout, Table, Chart, VLM)
- parsing: Document parsing and semantic chunking
- grounding: Visual evidence and cropping utilities
- extraction: Schema-driven field extraction
- validation: Result validation and confidence scoring
- tools: Agent tool implementations
"""
from .chunks import (
# Bounding box
BoundingBox,
# Chunk types
ChunkType,
ConfidenceLevel,
# Base chunks
DocumentChunk,
# Specialized chunks
TableCell,
TableChunk,
ChartDataPoint,
ChartChunk,
FormFieldChunk,
# Evidence
EvidenceRef,
# Parse results
PageResult,
ParseResult,
# Extraction
FieldExtraction,
ExtractionResult,
# Classification
DocumentType,
ClassificationResult,
)
from .io import (
DocumentFormat,
PageInfo,
DocumentInfo,
RenderOptions,
load_document,
load_pdf,
load_image,
get_document_cache,
)
from .parsing import (
ParserConfig,
DocumentParser,
parse_document,
SemanticChunker,
ChunkingConfig,
)
from .grounding import (
EvidenceBuilder,
EvidenceTracker,
CropManager,
crop_region,
crop_chunk,
create_annotated_image,
highlight_region,
)
from .extraction import (
FieldType,
FieldSpec,
ExtractionSchema,
ExtractionConfig,
FieldExtractor,
ExtractionValidator,
ValidationResult,
# Pre-built schemas
create_invoice_schema,
create_receipt_schema,
create_contract_schema,
)
__version__ = "0.1.0"
__all__ = [
# Version
"__version__",
# Chunks
"BoundingBox",
"ChunkType",
"ConfidenceLevel",
"DocumentChunk",
"TableCell",
"TableChunk",
"ChartDataPoint",
"ChartChunk",
"FormFieldChunk",
"EvidenceRef",
"PageResult",
"ParseResult",
"FieldExtraction",
"ExtractionResult",
"DocumentType",
"ClassificationResult",
# IO
"DocumentFormat",
"PageInfo",
"DocumentInfo",
"RenderOptions",
"load_document",
"load_pdf",
"load_image",
"get_document_cache",
# Parsing
"ParserConfig",
"DocumentParser",
"parse_document",
"SemanticChunker",
"ChunkingConfig",
# Grounding
"EvidenceBuilder",
"EvidenceTracker",
"CropManager",
"crop_region",
"crop_chunk",
"create_annotated_image",
"highlight_region",
# Extraction
"FieldType",
"FieldSpec",
"ExtractionSchema",
"ExtractionConfig",
"FieldExtractor",
"ExtractionValidator",
"ValidationResult",
"create_invoice_schema",
"create_receipt_schema",
"create_contract_schema",
]
|