MHamdan's picture
Initial commit: SPARKNET framework
d520909
"""
SPARKNET Document Intelligence
Vision-first agentic document understanding platform.
Modules:
- chunks: Core data models (BoundingBox, DocumentChunk, EvidenceRef, etc.)
- io: Document loading and rendering (PDF, images)
- models: Pluggable model interfaces (OCR, Layout, Table, Chart, VLM)
- parsing: Document parsing and semantic chunking
- grounding: Visual evidence and cropping utilities
- extraction: Schema-driven field extraction
- validation: Result validation and confidence scoring
- tools: Agent tool implementations
"""
from .chunks import (
# Bounding box
BoundingBox,
# Chunk types
ChunkType,
ConfidenceLevel,
# Base chunks
DocumentChunk,
# Specialized chunks
TableCell,
TableChunk,
ChartDataPoint,
ChartChunk,
FormFieldChunk,
# Evidence
EvidenceRef,
# Parse results
PageResult,
ParseResult,
# Extraction
FieldExtraction,
ExtractionResult,
# Classification
DocumentType,
ClassificationResult,
)
from .io import (
DocumentFormat,
PageInfo,
DocumentInfo,
RenderOptions,
load_document,
load_pdf,
load_image,
get_document_cache,
)
from .parsing import (
ParserConfig,
DocumentParser,
parse_document,
SemanticChunker,
ChunkingConfig,
)
from .grounding import (
EvidenceBuilder,
EvidenceTracker,
CropManager,
crop_region,
crop_chunk,
create_annotated_image,
highlight_region,
)
from .extraction import (
FieldType,
FieldSpec,
ExtractionSchema,
ExtractionConfig,
FieldExtractor,
ExtractionValidator,
ValidationResult,
# Pre-built schemas
create_invoice_schema,
create_receipt_schema,
create_contract_schema,
)
__version__ = "0.1.0"
__all__ = [
# Version
"__version__",
# Chunks
"BoundingBox",
"ChunkType",
"ConfidenceLevel",
"DocumentChunk",
"TableCell",
"TableChunk",
"ChartDataPoint",
"ChartChunk",
"FormFieldChunk",
"EvidenceRef",
"PageResult",
"ParseResult",
"FieldExtraction",
"ExtractionResult",
"DocumentType",
"ClassificationResult",
# IO
"DocumentFormat",
"PageInfo",
"DocumentInfo",
"RenderOptions",
"load_document",
"load_pdf",
"load_image",
"get_document_cache",
# Parsing
"ParserConfig",
"DocumentParser",
"parse_document",
"SemanticChunker",
"ChunkingConfig",
# Grounding
"EvidenceBuilder",
"EvidenceTracker",
"CropManager",
"crop_region",
"crop_chunk",
"create_annotated_image",
"highlight_region",
# Extraction
"FieldType",
"FieldSpec",
"ExtractionSchema",
"ExtractionConfig",
"FieldExtractor",
"ExtractionValidator",
"ValidationResult",
"create_invoice_schema",
"create_receipt_schema",
"create_contract_schema",
]