""" OCR Engine Factory Provides convenient functions to create and manage OCR engines. Handles fallback logic and singleton management. """ from typing import Optional, Dict from loguru import logger from .base import OCREngine, OCRConfig from .paddle_ocr import PaddleOCREngine, HAS_PADDLEOCR from .tesseract_ocr import TesseractOCREngine, HAS_TESSERACT # Singleton instances for reuse _ocr_engines: Dict[str, OCREngine] = {} def create_ocr_engine( engine_type: str = "auto", config: Optional[OCRConfig] = None, initialize: bool = True, ) -> OCREngine: """ Create an OCR engine instance. Args: engine_type: Engine type: "paddle", "paddleocr", "tesseract", or "auto" config: OCR configuration initialize: Whether to initialize the engine immediately Returns: OCREngine instance Raises: RuntimeError: If no OCR engine is available """ if config is None: config = OCRConfig() # Normalize engine type aliases if engine_type == "paddleocr": engine_type = "paddle" # Auto-select best available engine if engine_type == "auto": if HAS_PADDLEOCR: engine_type = "paddle" logger.info("Auto-selected PaddleOCR engine") elif HAS_TESSERACT: engine_type = "tesseract" logger.info("Auto-selected Tesseract engine") else: raise RuntimeError( "No OCR engine available. Install one of: " "pip install paddleocr paddlepaddle-gpu OR " "pip install pytesseract (+ apt-get install tesseract-ocr)" ) # Create engine if engine_type == "paddle": if not HAS_PADDLEOCR: raise RuntimeError( "PaddleOCR not installed. Install with: " "pip install paddleocr paddlepaddle-gpu" ) engine = PaddleOCREngine(config) elif engine_type == "tesseract": if not HAS_TESSERACT: raise RuntimeError( "Tesseract not installed. Install with: " "pip install pytesseract (+ apt-get install tesseract-ocr)" ) engine = TesseractOCREngine(config) else: raise ValueError(f"Unknown engine type: {engine_type}") # Initialize if requested if initialize: engine.initialize() return engine def get_ocr_engine( engine_type: str = "auto", config: Optional[OCRConfig] = None, ) -> OCREngine: """ Get or create an OCR engine singleton. Reuses existing engine instances for efficiency. Args: engine_type: Engine type: "paddle", "paddleocr", "tesseract", or "auto" config: OCR configuration (only used for new instances) Returns: OCREngine instance """ global _ocr_engines # Normalize engine type aliases if engine_type == "paddleocr": engine_type = "paddle" # Resolve auto to specific type if engine_type == "auto": if HAS_PADDLEOCR: engine_type = "paddle" elif HAS_TESSERACT: engine_type = "tesseract" else: raise RuntimeError("No OCR engine available") # Check for existing instance if engine_type in _ocr_engines: return _ocr_engines[engine_type] # Create new instance engine = create_ocr_engine(engine_type, config, initialize=True) _ocr_engines[engine_type] = engine return engine def get_available_engines() -> Dict[str, bool]: """ Get availability status of OCR engines. Returns: Dict mapping engine name to availability """ return { "paddle": HAS_PADDLEOCR, "tesseract": HAS_TESSERACT, } def clear_engines(): """Clear all cached OCR engine instances.""" global _ocr_engines _ocr_engines.clear() logger.debug("Cleared OCR engine cache") class OCREngineManager: """ Context manager for OCR engine lifecycle. Example: with OCREngineManager("paddle") as engine: result = engine.recognize(image) """ def __init__( self, engine_type: str = "auto", config: Optional[OCRConfig] = None, use_singleton: bool = True, ): """ Initialize OCR engine manager. Args: engine_type: Engine type config: OCR configuration use_singleton: Whether to use singleton instance """ self.engine_type = engine_type self.config = config self.use_singleton = use_singleton self._engine: Optional[OCREngine] = None self._owned = False def __enter__(self) -> OCREngine: """Enter context and return engine.""" if self.use_singleton: self._engine = get_ocr_engine(self.engine_type, self.config) self._owned = False else: self._engine = create_ocr_engine(self.engine_type, self.config) self._owned = True return self._engine def __exit__(self, exc_type, exc_val, exc_tb): """Exit context.""" # Don't clean up singletons if self._owned and self._engine: # Could add cleanup here if needed pass self._engine = None return False