| | """ |
| | Thread-safe singleton for AlignedEmbedder. |
| | |
| | When running experiments with --parallel N, each thread would normally |
| | create its own AlignedEmbedder, loading CLIP + CLAP models redundantly. |
| | This module provides a shared, thread-safe instance that all threads use. |
| | |
| | Usage: |
| | from src.embeddings.shared_embedder import get_shared_embedder |
| | embedder = get_shared_embedder() # Returns the same instance every time |
| | """ |
| |
|
| | from __future__ import annotations |
| |
|
| | import threading |
| | from typing import Optional |
| |
|
| | from src.embeddings.aligned_embeddings import AlignedEmbedder |
| |
|
| | _lock = threading.Lock() |
| | _instance: Optional[AlignedEmbedder] = None |
| |
|
| |
|
| | def get_shared_embedder( |
| | target_dim: int = 512, |
| | enable_cache: bool = True, |
| | cache_dir: str = ".cache/embeddings", |
| | ) -> AlignedEmbedder: |
| | """ |
| | Get or create the shared AlignedEmbedder instance. |
| | |
| | Thread-safe: uses double-checked locking. The first call creates the |
| | instance; subsequent calls return the same object immediately. |
| | |
| | The underlying CLIP/CLAP models are read-only at inference time, so |
| | sharing across threads is safe. The EmbeddingCache uses file-based |
| | storage which handles concurrent access. |
| | |
| | Args: |
| | target_dim: Embedding dimension (only used on first call) |
| | enable_cache: Whether to enable disk caching (only used on first call) |
| | cache_dir: Cache directory path (only used on first call) |
| | |
| | Returns: |
| | Shared AlignedEmbedder instance |
| | """ |
| | global _instance |
| | if _instance is not None: |
| | return _instance |
| |
|
| | with _lock: |
| | |
| | if _instance is not None: |
| | return _instance |
| | _instance = AlignedEmbedder( |
| | target_dim=target_dim, |
| | enable_cache=enable_cache, |
| | cache_dir=cache_dir, |
| | ) |
| | return _instance |
| |
|
| |
|
| | def reset_shared_embedder() -> None: |
| | """ |
| | Reset the singleton (for testing or reconfiguration). |
| | |
| | Not thread-safe — call only when no other threads are using the embedder. |
| | """ |
| | global _instance |
| | with _lock: |
| | _instance = None |
| |
|