Spaces:
Running
Running
| """ | |
| LITVISION Recommendation API — Utility Module | |
| =============================================== | |
| Production logging, device management, CUDA OOM handling, | |
| and temp/cache cleanup helpers. | |
| """ | |
| import os | |
| import gc | |
| import logging | |
| import shutil | |
| from typing import Optional | |
| import torch | |
| # --------------------------------------------------------------------------- | |
| # Logging | |
| # --------------------------------------------------------------------------- | |
| LOG_FORMAT = "%(asctime)s | %(levelname)-8s | %(name)s | %(message)s" | |
| LOG_DATE_FORMAT = "%Y-%m-%d %H:%M:%S" | |
| def setup_logging(level: int = logging.INFO) -> None: | |
| """Configure production-grade structured logging.""" | |
| logging.basicConfig( | |
| level=level, | |
| format=LOG_FORMAT, | |
| datefmt=LOG_DATE_FORMAT, | |
| force=True, | |
| ) | |
| # Silence overly chatty third-party loggers | |
| for noisy in ("transformers", "sentence_transformers", "faiss", "urllib3"): | |
| logging.getLogger(noisy).setLevel(logging.WARNING) | |
| logger = logging.getLogger("litvision.recommendation") | |
| # --------------------------------------------------------------------------- | |
| # Device helpers | |
| # --------------------------------------------------------------------------- | |
| def get_device() -> str: | |
| """Return the best available torch device string.""" | |
| if torch.cuda.is_available(): | |
| device = "cuda" | |
| gpu_name = torch.cuda.get_device_name(0) | |
| mem = torch.cuda.get_device_properties(0).total_mem / (1024 ** 3) | |
| logger.info(f"CUDA device detected: {gpu_name} ({mem:.1f} GB)") | |
| else: | |
| device = "cpu" | |
| logger.info("No CUDA device — running on CPU") | |
| return device | |
| def safe_cuda_empty_cache() -> None: | |
| """Clear CUDA cache if available; silently no-op on CPU.""" | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| logger.info("CUDA cache cleared") | |
| def handle_cuda_oom(exc: Exception) -> str: | |
| """Handle a CUDA OOM exception: clear caches and return a user message.""" | |
| safe_cuda_empty_cache() | |
| msg = ( | |
| "GPU out of memory during recommendation generation. " | |
| "The CUDA cache has been cleared. Please retry with a smaller request." | |
| ) | |
| logger.error(f"CUDA OOM: {exc}") | |
| return msg | |
| # --------------------------------------------------------------------------- | |
| # Temp / cache cleanup | |
| # --------------------------------------------------------------------------- | |
| _TEMP_DIRS = [ | |
| os.environ.get("HF_HOME", "/tmp/huggingface"), | |
| ] | |
| def cleanup_temp_files() -> None: | |
| """Remove transient cache artefacts that are safe to delete.""" | |
| for d in _TEMP_DIRS: | |
| cache_dir = os.path.join(d, "hub", ".locks") | |
| if os.path.isdir(cache_dir): | |
| try: | |
| shutil.rmtree(cache_dir, ignore_errors=True) | |
| logger.info(f"Cleaned lock dir: {cache_dir}") | |
| except Exception as e: | |
| logger.warning(f"Could not clean {cache_dir}: {e}") | |
| # --------------------------------------------------------------------------- | |
| # Validation helpers | |
| # --------------------------------------------------------------------------- | |
| def validate_positive_int(value: int, name: str, max_val: Optional[int] = None) -> int: | |
| """Ensure *value* is a positive integer, optionally capped at *max_val*.""" | |
| if not isinstance(value, int) or value < 1: | |
| raise ValueError(f"{name} must be a positive integer, got {value!r}") | |
| if max_val is not None and value > max_val: | |
| raise ValueError(f"{name} must be ≤ {max_val}, got {value}") | |
| return value | |