live / tests /test_model_spec.py
github-actions[bot]
deploy: sync from GitHub 2026-04-18T00:48:45Z
96bb363
"""Tests for InferenceModelSpec."""
from __future__ import annotations
import pytest
from openg2g.datacenter.config import InferenceModelSpec
class TestInferenceModelSpec:
def test_basic(self) -> None:
"""Constructor should store label, replica count, and GPUs per replica."""
m = InferenceModelSpec(
model_label="TestModel", num_replicas=10, gpus_per_replica=4, initial_batch_size=128, itl_deadline_s=0.1
)
assert m.model_label == "TestModel"
assert m.num_replicas == 10
assert m.gpus_per_replica == 4
def test_default_batch_sizes(self) -> None:
"""Default feasible_batch_sizes should be (initial_batch_size,)."""
m = InferenceModelSpec(
model_label="M", num_replicas=1, gpus_per_replica=1, initial_batch_size=128, itl_deadline_s=0.1
)
assert m.feasible_batch_sizes == (128,)
m2 = InferenceModelSpec(
model_label="M", num_replicas=1, gpus_per_replica=1, initial_batch_size=64, itl_deadline_s=0.1
)
assert m2.feasible_batch_sizes == (64,)
def test_custom_batch_sizes(self) -> None:
m = InferenceModelSpec(
model_label="M",
num_replicas=1,
gpus_per_replica=1,
initial_batch_size=16,
itl_deadline_s=0.1,
feasible_batch_sizes=(8, 16, 32, 64),
)
assert m.initial_batch_size == 16
def test_itl_deadline(self) -> None:
"""ITL deadline should be stored as given."""
m = InferenceModelSpec(
model_label="M", num_replicas=1, gpus_per_replica=1, initial_batch_size=128, itl_deadline_s=0.08
)
assert m.itl_deadline_s == 0.08
def test_initial_batch_not_in_feasible_raises(self) -> None:
"""initial_batch_size must be in feasible_batch_sizes when specified."""
with pytest.raises(ValueError, match=r"initial_batch_size.*must be in.*feasible_batch_sizes"):
InferenceModelSpec(
model_label="M",
num_replicas=1,
gpus_per_replica=1,
initial_batch_size=128,
itl_deadline_s=0.1,
feasible_batch_sizes=(8, 16, 32, 64),
)
def test_negative_replicas_raises(self) -> None:
"""Negative num_replicas should raise ValueError."""
with pytest.raises(ValueError, match="num_replicas must be >= 0"):
InferenceModelSpec(
model_label="M", num_replicas=-1, gpus_per_replica=1, initial_batch_size=128, itl_deadline_s=0.1
)
def test_zero_gpus_per_replica_raises(self) -> None:
"""Zero gpus_per_replica should raise ValueError."""
with pytest.raises(ValueError, match="gpus_per_replica must be >= 1"):
InferenceModelSpec(
model_label="M", num_replicas=1, gpus_per_replica=0, initial_batch_size=128, itl_deadline_s=0.1
)
def test_zero_initial_batch_raises(self) -> None:
"""Zero initial_batch_size should raise ValueError."""
with pytest.raises(ValueError, match="initial_batch_size must be > 0"):
InferenceModelSpec(
model_label="M", num_replicas=1, gpus_per_replica=1, initial_batch_size=0, itl_deadline_s=0.1
)
def test_zero_itl_deadline_raises(self) -> None:
"""Zero itl_deadline_s should raise ValueError."""
with pytest.raises(ValueError, match="itl_deadline_s must be > 0"):
InferenceModelSpec(
model_label="M", num_replicas=1, gpus_per_replica=1, initial_batch_size=128, itl_deadline_s=0.0
)