[Hardware] Replace torch.cuda.device_count/current_device/set_device API (#36145)

Signed-off-by: Kunshang Ji <jikunshang95@gmail.com> Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
2026-03-12 22:57:47 +08:00
parent 2e693f48e7
commit 53ec16a705
89 changed files with 254 additions and 219 deletions
--- a/tests/lora/test_fused_moe_lora_kernel.py
+++ b/tests/lora/test_fused_moe_lora_kernel.py
@@ -638,7 +638,7 @@ def use_fused_moe_lora_kernel_tensor_parallel(
    set_random_seed(seed)

    device = torch.device(f"cuda:{local_rank}")
-    torch.cuda.set_device(device)
+    torch.accelerator.set_device_index(device)
    torch.set_default_device(device)
    torch.set_default_dtype(dtype)

--- a/tests/lora/test_layers.py
+++ b/tests/lora/test_layers.py
@@ -61,7 +61,7 @@ pytestmark = pytest.mark.skipif(
 )

 DEVICES = (
-    [f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)]
+    [f"cuda:{i}" for i in range(1 if torch.accelerator.device_count() == 1 else 2)]
    if current_platform.is_cuda_alike()
    else ["cpu"]
 )
@@ -260,7 +260,7 @@ def test_embeddings(
    # device, see: https://github.com/triton-lang/triton/issues/2925
    # Same below.
    if current_platform.is_cuda_alike():
-        torch.cuda.set_device(device)
+        torch.accelerator.set_device_index(device)

    torch.set_default_device(device)
    max_loras = 8
@@ -359,7 +359,7 @@ def test_lm_head_logits_processor(
    default_vllm_config, dist_init, num_loras, device, vocab_size, stage
 ) -> None:
    if current_platform.is_cuda_alike():
-        torch.cuda.set_device(device)
+        torch.accelerator.set_device_index(device)

    torch.set_default_device(device)
    max_loras = 8
@@ -476,7 +476,7 @@ def test_lm_head_logits_processor_invalid_vocab_size(
 ) -> None:
    """Test that LogitsProcessorWithLoRA raises ValueError for invalid vocab sizes."""
    if current_platform.is_cuda_alike():
-        torch.cuda.set_device(device)
+        torch.accelerator.set_device_index(device)

    torch.set_default_device(device)
    max_loras = 8
@@ -505,7 +505,7 @@ def test_linear_replicated(
    stage,
 ) -> None:
    if current_platform.is_cuda_alike():
-        torch.cuda.set_device(device)
+        torch.accelerator.set_device_index(device)

    max_loras = 8
    torch.set_default_device(device)
@@ -612,7 +612,7 @@ def test_linear_parallel(
    default_vllm_config, dist_init, num_loras, orientation, fully_shard, device, stage
 ) -> None:
    if current_platform.is_cuda_alike():
-        torch.cuda.set_device(device)
+        torch.accelerator.set_device_index(device)

    max_loras = 8
    torch.set_default_device(device)
@@ -737,7 +737,7 @@ def test_column_parallel_packed(
    default_vllm_config, dist_init, num_loras, repeats, fully_shard, device, stage
 ) -> None:
    if current_platform.is_cuda_alike():
-        torch.cuda.set_device(device)
+        torch.accelerator.set_device_index(device)

    max_loras = 8
    torch.set_default_device(device)
@@ -885,7 +885,7 @@ def test_merged_column_parallel_variable_slice(
    default_vllm_config, dist_init, num_loras, num_slices, device, stage
 ) -> None:
    if current_platform.is_cuda_alike():
-        torch.cuda.set_device(device)
+        torch.accelerator.set_device_index(device)

    max_loras = 8
    torch.set_default_device(device)
--- a/tests/lora/test_lora_manager.py
+++ b/tests/lora/test_lora_manager.py
@@ -37,7 +37,7 @@ EMBEDDING_MODULES = {


 DEVICES = (
-    [f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)]
+    [f"cuda:{i}" for i in range(1 if torch.accelerator.device_count() == 1 else 2)]
    if current_platform.is_cuda_alike()
    else ["cpu"]
 )
--- a/tests/lora/test_mixtral.py
+++ b/tests/lora/test_mixtral.py
@@ -34,7 +34,7 @@ def do_sample(
 def test_mixtral_lora(mixtral_lora_files, tp_size):
    """Original test, the LoRA model has the common target modules, not all"""
    if (
-        torch.cuda.device_count() < tp_size
+        torch.accelerator.device_count() < tp_size
        and tp_size > 1
        and current_platform.is_cuda_alike()
    ):
--- a/tests/lora/test_punica_ops.py
+++ b/tests/lora/test_punica_ops.py
@@ -395,7 +395,7 @@ def test_kernels(
    Tests LoRA kernels.
    """
    torch.set_default_device(device)
-    torch.cuda.set_device(device)
+    torch.accelerator.set_device_index(device)
    set_random_seed(seed)

    if op_type == "shrink":
@@ -448,7 +448,7 @@ def test_kernels_hidden_size(
    Tests SGMV and LoRA kernels.
    """
    torch.set_default_device(device)
-    torch.cuda.set_device(device)
+    torch.accelerator.set_device_index(device)
    set_random_seed(seed)

    if op_type == "shrink":