| """Tests for InferenceModelSpec.""" | |
| from __future__ import annotations | |
| import pytest | |
| from openg2g.datacenter.config import InferenceModelSpec | |
| class TestInferenceModelSpec: | |
| def test_basic(self) -> None: | |
| """Constructor should store label, replica count, and GPUs per replica.""" | |
| m = InferenceModelSpec( | |
| model_label="TestModel", num_replicas=10, gpus_per_replica=4, initial_batch_size=128, itl_deadline_s=0.1 | |
| ) | |
| assert m.model_label == "TestModel" | |
| assert m.num_replicas == 10 | |
| assert m.gpus_per_replica == 4 | |
| def test_default_batch_sizes(self) -> None: | |
| """Default feasible_batch_sizes should be (initial_batch_size,).""" | |
| m = InferenceModelSpec( | |
| model_label="M", num_replicas=1, gpus_per_replica=1, initial_batch_size=128, itl_deadline_s=0.1 | |
| ) | |
| assert m.feasible_batch_sizes == (128,) | |
| m2 = InferenceModelSpec( | |
| model_label="M", num_replicas=1, gpus_per_replica=1, initial_batch_size=64, itl_deadline_s=0.1 | |
| ) | |
| assert m2.feasible_batch_sizes == (64,) | |
| def test_custom_batch_sizes(self) -> None: | |
| m = InferenceModelSpec( | |
| model_label="M", | |
| num_replicas=1, | |
| gpus_per_replica=1, | |
| initial_batch_size=16, | |
| itl_deadline_s=0.1, | |
| feasible_batch_sizes=(8, 16, 32, 64), | |
| ) | |
| assert m.initial_batch_size == 16 | |
| def test_itl_deadline(self) -> None: | |
| """ITL deadline should be stored as given.""" | |
| m = InferenceModelSpec( | |
| model_label="M", num_replicas=1, gpus_per_replica=1, initial_batch_size=128, itl_deadline_s=0.08 | |
| ) | |
| assert m.itl_deadline_s == 0.08 | |
| def test_initial_batch_not_in_feasible_raises(self) -> None: | |
| """initial_batch_size must be in feasible_batch_sizes when specified.""" | |
| with pytest.raises(ValueError, match=r"initial_batch_size.*must be in.*feasible_batch_sizes"): | |
| InferenceModelSpec( | |
| model_label="M", | |
| num_replicas=1, | |
| gpus_per_replica=1, | |
| initial_batch_size=128, | |
| itl_deadline_s=0.1, | |
| feasible_batch_sizes=(8, 16, 32, 64), | |
| ) | |
| def test_negative_replicas_raises(self) -> None: | |
| """Negative num_replicas should raise ValueError.""" | |
| with pytest.raises(ValueError, match="num_replicas must be >= 0"): | |
| InferenceModelSpec( | |
| model_label="M", num_replicas=-1, gpus_per_replica=1, initial_batch_size=128, itl_deadline_s=0.1 | |
| ) | |
| def test_zero_gpus_per_replica_raises(self) -> None: | |
| """Zero gpus_per_replica should raise ValueError.""" | |
| with pytest.raises(ValueError, match="gpus_per_replica must be >= 1"): | |
| InferenceModelSpec( | |
| model_label="M", num_replicas=1, gpus_per_replica=0, initial_batch_size=128, itl_deadline_s=0.1 | |
| ) | |
| def test_zero_initial_batch_raises(self) -> None: | |
| """Zero initial_batch_size should raise ValueError.""" | |
| with pytest.raises(ValueError, match="initial_batch_size must be > 0"): | |
| InferenceModelSpec( | |
| model_label="M", num_replicas=1, gpus_per_replica=1, initial_batch_size=0, itl_deadline_s=0.1 | |
| ) | |
| def test_zero_itl_deadline_raises(self) -> None: | |
| """Zero itl_deadline_s should raise ValueError.""" | |
| with pytest.raises(ValueError, match="itl_deadline_s must be > 0"): | |
| InferenceModelSpec( | |
| model_label="M", num_replicas=1, gpus_per_replica=1, initial_batch_size=128, itl_deadline_s=0.0 | |
| ) | |