"""Modal executor — skeleton for v0. This file is a STUB. The full Modal integration requires the `modal` client library installed (`pip install modal`) and a configured Modal account (`~/.modal.toml`). The user's environment has both, but the test suite must run without them, so we keep this file import-safe. Real implementation lives in v0 polish; the docstring below is the contract. """ from __future__ import annotations from typing import Any, Callable, Mapping from composer_replication.diloco.serverless.executor import ( ReplicaHandle, ServerlessExecutor, ) class ModalExecutor(ServerlessExecutor): """Run replicas as Modal Functions in parallel. Reference implementation pattern (per ADR-005): @app.function(gpu="A100-40GB", timeout=3600) def run_replica(rank: int, rendezvous_uri: str, **kwargs): os.environ["REPLICA_RANK"] = str(rank) from composer_replication.diloco.serverless import ( MockManager, ObjectStoreAllReduce, ) store = ObjectStoreAllReduce(rendezvous_uri, rank=rank, world_size=N) manager = MockManager(store) # ... run the trainer with this manager ... Then `launch_replicas` does: calls = [run_replica.spawn(rank=i, ...) for i in range(N)] return [ReplicaHandle(rank=i, backend_name="modal", metadata={"call_id": calls[i].object_id}) for i in range(N)] Pricing reference (2026-05-26): A100-40GB ≈ $1.95/hr, H100 ≈ $5.50/hr. Cold start ≈ 30s. Inter-job networking via cluster mode (opt-in, not used by default). Status: SKELETON. Real implementation pending v0 polish wave. """ backend_name = "modal" supports_inter_replica_network = False # default; cluster mode = True def __init__(self, *, app_name: str = "composer-replication-diloco") -> None: try: import modal # noqa: F401 except ImportError as e: raise RuntimeError( "ModalExecutor requires the modal client. Install with " "`pip install modal` and configure with `modal token new`. " "Got: " + repr(e) ) self.app_name = app_name # Real implementation: build a `modal.App` and register `run_replica` # here so that subsequent `launch_replicas` can `.spawn()` it. raise NotImplementedError( "ModalExecutor is a v0 skeleton; full implementation pending. " "Use LocalProcessExecutor for testing." ) # All Protocol methods raise NotImplementedError via __init__ — the # class never instantiates successfully in the skeleton. Sketch # signatures here for documentation: def launch_replicas( self, n_replicas: int, entrypoint: str | Callable[..., Any], entrypoint_args: Mapping[str, Any], *, gpu: str | None = "A100-40GB", timeout: int = 3600, ) -> list[ReplicaHandle]: raise NotImplementedError def poll(self, handle: ReplicaHandle) -> str: raise NotImplementedError def stream_logs(self, handle: ReplicaHandle, *, n_lines: int = 200) -> str: raise NotImplementedError def cancel(self, handle: ReplicaHandle) -> None: raise NotImplementedError def collect( self, handles: list[ReplicaHandle], *, timeout: int | None = None, ) -> list[dict[str, Any]]: raise NotImplementedError __all__ = ["ModalExecutor"]