""" Document Intelligence IO Module Document loading, rendering, and caching: - PDF loading with PyMuPDF - Image loading (JPEG, PNG, TIFF) - Page rendering at configurable DPI - File-based caching with LRU eviction """ from .base import ( # Format detection DocumentFormat, # Metadata PageInfo, DocumentInfo, # Options RenderOptions, # Base classes DocumentLoader, PageRenderer, DocumentProcessor, ) from .pdf import ( PDFLoader, PDFRenderer, PDFTextExtractor, load_pdf, ) from .image import ( ImageLoader, ImageRenderer, load_image, ) from .cache import ( CacheConfig, CacheEntry, DocumentCache, get_document_cache, cached_page, ) __all__ = [ # Format "DocumentFormat", # Metadata "PageInfo", "DocumentInfo", "RenderOptions", # Base "DocumentLoader", "PageRenderer", "DocumentProcessor", # PDF "PDFLoader", "PDFRenderer", "PDFTextExtractor", "load_pdf", # Image "ImageLoader", "ImageRenderer", "load_image", # Cache "CacheConfig", "CacheEntry", "DocumentCache", "get_document_cache", "cached_page", ] def load_document(path): """ Load a document based on its format. Auto-detects format from file extension. Args: path: Path to document file Returns: Tuple of (loader, renderer) """ from pathlib import Path as PathLib path = PathLib(path) fmt = DocumentFormat.from_path(path) if fmt == DocumentFormat.PDF: return load_pdf(path) elif fmt in {DocumentFormat.IMAGE, DocumentFormat.TIFF_MULTIPAGE}: return load_image(path) else: raise ValueError(f"Unsupported document format: {path.suffix}")