[Hardware] Replace torch.cuda.device_count/current_device/set_device API (#36145)

Signed-off-by: Kunshang Ji <jikunshang95@gmail.com>
Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
Kunshang Ji
2026-03-12 22:57:47 +08:00
committed by GitHub
parent 2e693f48e7
commit 53ec16a705
89 changed files with 254 additions and 219 deletions

View File

@@ -638,7 +638,7 @@ def use_fused_moe_lora_kernel_tensor_parallel(
set_random_seed(seed)
device = torch.device(f"cuda:{local_rank}")
torch.cuda.set_device(device)
torch.accelerator.set_device_index(device)
torch.set_default_device(device)
torch.set_default_dtype(dtype)

View File

@@ -61,7 +61,7 @@ pytestmark = pytest.mark.skipif(
)
DEVICES = (
[f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)]
[f"cuda:{i}" for i in range(1 if torch.accelerator.device_count() == 1 else 2)]
if current_platform.is_cuda_alike()
else ["cpu"]
)
@@ -260,7 +260,7 @@ def test_embeddings(
# device, see: https://github.com/triton-lang/triton/issues/2925
# Same below.
if current_platform.is_cuda_alike():
torch.cuda.set_device(device)
torch.accelerator.set_device_index(device)
torch.set_default_device(device)
max_loras = 8
@@ -359,7 +359,7 @@ def test_lm_head_logits_processor(
default_vllm_config, dist_init, num_loras, device, vocab_size, stage
) -> None:
if current_platform.is_cuda_alike():
torch.cuda.set_device(device)
torch.accelerator.set_device_index(device)
torch.set_default_device(device)
max_loras = 8
@@ -476,7 +476,7 @@ def test_lm_head_logits_processor_invalid_vocab_size(
) -> None:
"""Test that LogitsProcessorWithLoRA raises ValueError for invalid vocab sizes."""
if current_platform.is_cuda_alike():
torch.cuda.set_device(device)
torch.accelerator.set_device_index(device)
torch.set_default_device(device)
max_loras = 8
@@ -505,7 +505,7 @@ def test_linear_replicated(
stage,
) -> None:
if current_platform.is_cuda_alike():
torch.cuda.set_device(device)
torch.accelerator.set_device_index(device)
max_loras = 8
torch.set_default_device(device)
@@ -612,7 +612,7 @@ def test_linear_parallel(
default_vllm_config, dist_init, num_loras, orientation, fully_shard, device, stage
) -> None:
if current_platform.is_cuda_alike():
torch.cuda.set_device(device)
torch.accelerator.set_device_index(device)
max_loras = 8
torch.set_default_device(device)
@@ -737,7 +737,7 @@ def test_column_parallel_packed(
default_vllm_config, dist_init, num_loras, repeats, fully_shard, device, stage
) -> None:
if current_platform.is_cuda_alike():
torch.cuda.set_device(device)
torch.accelerator.set_device_index(device)
max_loras = 8
torch.set_default_device(device)
@@ -885,7 +885,7 @@ def test_merged_column_parallel_variable_slice(
default_vllm_config, dist_init, num_loras, num_slices, device, stage
) -> None:
if current_platform.is_cuda_alike():
torch.cuda.set_device(device)
torch.accelerator.set_device_index(device)
max_loras = 8
torch.set_default_device(device)

View File

@@ -37,7 +37,7 @@ EMBEDDING_MODULES = {
DEVICES = (
[f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)]
[f"cuda:{i}" for i in range(1 if torch.accelerator.device_count() == 1 else 2)]
if current_platform.is_cuda_alike()
else ["cpu"]
)

View File

@@ -34,7 +34,7 @@ def do_sample(
def test_mixtral_lora(mixtral_lora_files, tp_size):
"""Original test, the LoRA model has the common target modules, not all"""
if (
torch.cuda.device_count() < tp_size
torch.accelerator.device_count() < tp_size
and tp_size > 1
and current_platform.is_cuda_alike()
):

View File

@@ -395,7 +395,7 @@ def test_kernels(
Tests LoRA kernels.
"""
torch.set_default_device(device)
torch.cuda.set_device(device)
torch.accelerator.set_device_index(device)
set_random_seed(seed)
if op_type == "shrink":
@@ -448,7 +448,7 @@ def test_kernels_hidden_size(
Tests SGMV and LoRA kernels.
"""
torch.set_default_device(device)
torch.cuda.set_device(device)
torch.accelerator.set_device_index(device)
set_random_seed(seed)
if op_type == "shrink":