pratik-250620's picture
Upload folder using huggingface_hub
6835659 verified
"""
Thread-safe singleton for AlignedEmbedder.
When running experiments with --parallel N, each thread would normally
create its own AlignedEmbedder, loading CLIP + CLAP models redundantly.
This module provides a shared, thread-safe instance that all threads use.
Usage:
from src.embeddings.shared_embedder import get_shared_embedder
embedder = get_shared_embedder() # Returns the same instance every time
"""
from __future__ import annotations
import threading
from typing import Optional
from src.embeddings.aligned_embeddings import AlignedEmbedder
_lock = threading.Lock()
_instance: Optional[AlignedEmbedder] = None
def get_shared_embedder(
target_dim: int = 512,
enable_cache: bool = True,
cache_dir: str = ".cache/embeddings",
) -> AlignedEmbedder:
"""
Get or create the shared AlignedEmbedder instance.
Thread-safe: uses double-checked locking. The first call creates the
instance; subsequent calls return the same object immediately.
The underlying CLIP/CLAP models are read-only at inference time, so
sharing across threads is safe. The EmbeddingCache uses file-based
storage which handles concurrent access.
Args:
target_dim: Embedding dimension (only used on first call)
enable_cache: Whether to enable disk caching (only used on first call)
cache_dir: Cache directory path (only used on first call)
Returns:
Shared AlignedEmbedder instance
"""
global _instance
if _instance is not None:
return _instance
with _lock:
# Double-checked locking
if _instance is not None:
return _instance
_instance = AlignedEmbedder(
target_dim=target_dim,
enable_cache=enable_cache,
cache_dir=cache_dir,
)
return _instance
def reset_shared_embedder() -> None:
"""
Reset the singleton (for testing or reconfiguration).
Not thread-safe — call only when no other threads are using the embedder.
"""
global _instance
with _lock:
_instance = None