[UT][intel GPU] use current_platform instead of device hardcode in v1 tests (#20169)

Signed-off-by: Ma, Liangliang <liangliang.ma@intel.com>
2025-07-02 09:06:04 +08:00
parent 3be8d312a2
commit a0389e0554
10 changed files with 44 additions and 26 deletions
--- a/tests/v1/spec_decode/test_eagle.py
+++ b/tests/v1/spec_decode/test_eagle.py
@@ -10,6 +10,7 @@ from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, ModelConfig,
                         ParallelConfig, SchedulerConfig, SpeculativeConfig,
                         VllmConfig)
 from vllm.model_executor.models.llama import LlamaForCausalLM
+from vllm.platforms import current_platform
 from vllm.v1.spec_decode.eagle import EagleProposer

 model_dir = "meta-llama/Llama-3.1-8B-Instruct"
@@ -38,15 +39,17 @@ def _create_proposer(method: str, k: int) -> EagleProposer:
        num_speculative_tokens=k,
    )

-    vllm_config = VllmConfig(model_config=model_config,
-                             cache_config=CacheConfig(),
-                             speculative_config=speculative_config,
-                             device_config=DeviceConfig(device="cuda"),
-                             parallel_config=ParallelConfig(),
-                             load_config=LoadConfig(),
-                             scheduler_config=SchedulerConfig())
+    vllm_config = VllmConfig(
+        model_config=model_config,
+        cache_config=CacheConfig(),
+        speculative_config=speculative_config,
+        device_config=DeviceConfig(device=current_platform.device_type),
+        parallel_config=ParallelConfig(),
+        load_config=LoadConfig(),
+        scheduler_config=SchedulerConfig())

-    return EagleProposer(vllm_config=vllm_config, device='cuda')
+    return EagleProposer(vllm_config=vllm_config,
+                         device=current_platform.device_type)


 def test_prepare_inputs():
@@ -59,7 +62,7 @@ def test_prepare_inputs():
                    a, a + 1, ..., a + b - n2 - 1,
                    a + b, a + b + 1, ..., a + b + c - n3 - 1]
    """
-    device = torch.device('cuda')
+    device = torch.device(current_platform.device_type)

    # a = 4, b = 7, c = 5
    # n1 = 1, n2 = 3, n3 = 2
@@ -198,7 +201,7 @@ def test_load_model(mock_get_model, mock_get_layers, mock_get_pp_group, method,
@pytest.mark.parametrize("num_speculative_tokens", [1, 3, 8])
 def test_propose(num_speculative_tokens):
    # Use GPU device
-    device = torch.device('cuda')
+    device = torch.device(current_platform.device_type)

    # Setup test parameters
    batch_size = 2