replace cuda_device_count_stateless() to current_platform.device_count() (#37841)

Signed-off-by: Liao, Wei <wei.liao@intel.com>
Signed-off-by: wliao2 <wei.liao@intel.com>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
wliao2
2026-03-31 07:32:54 -07:00
committed by GitHub
parent e8057c00bc
commit 4dfad17ed1
20 changed files with 96 additions and 92 deletions

View File

@@ -15,7 +15,7 @@ from tests.v1.shutdown.utils import (
from vllm import LLM, AsyncEngineArgs, SamplingParams
from vllm.distributed import get_tensor_model_parallel_rank
from vllm.model_executor.models.llama import LlamaForCausalLM
from vllm.utils.torch_utils import cuda_device_count_stateless
from vllm.platforms import current_platform
from vllm.v1.engine.async_llm import AsyncLLM
from vllm.v1.engine.exceptions import EngineDeadError
@@ -60,7 +60,7 @@ async def test_async_llm_model_error(
AsyncLLM always uses an MP client.
"""
if cuda_device_count_stateless() < tensor_parallel_size:
if current_platform.device_count() < tensor_parallel_size:
pytest.skip(reason="Not enough CUDA devices")
# Monkeypatch an error in the model.
@@ -126,7 +126,7 @@ def test_llm_model_error(
TODO(andy) - LLM without multiprocessing; LLM with multiprocessing
and >1 rank
"""
if cuda_device_count_stateless() < tensor_parallel_size:
if current_platform.device_count() < tensor_parallel_size:
pytest.skip(reason="Not enough CUDA devices")
with monkeypatch.context() as m: