from inference.device_utils import is_cuda_oom, iter_inference_device_plans def test_is_cuda_oom_matches_pytorch_message() -> None: exc = RuntimeError( "CUDA out of memory. Tried to allocate 384.00 MiB. " "GPU 0 has a total capacity of 3.68 GiB of which 350.19 MiB is free." ) assert is_cuda_oom(exc) def test_is_cuda_oom_rejects_other_errors() -> None: assert not is_cuda_oom(RuntimeError("disk full")) def test_iter_inference_device_plans_includes_cpu(monkeypatch) -> None: monkeypatch.setenv("INFERENCE_DEVICE", "cpu") plans = list(iter_inference_device_plans()) assert len(plans) == 1 assert plans[0].device == "cpu"