refactor hard coded device string in test files under tests/v1 and tests/lora (#37566)

Signed-off-by: Liao, Wei <wei.liao@intel.com>
2026-04-02 20:21:47 -07:00
parent 4a06e1246e
commit 32e0c0bfa2
28 changed files with 239 additions and 146 deletions
--- a/tests/v1/spec_decode/test_tree_attention.py
+++ b/tests/v1/spec_decode/test_tree_attention.py
@@ -18,6 +18,8 @@ from vllm.v1.attention.backend import CommonAttentionMetadata
 from vllm.v1.attention.backends.fa_utils import is_flash_attn_varlen_func_available
 from vllm.v1.attention.backends.registry import AttentionBackendEnum

+DEVICE_TYPE = current_platform.device_type
+
 if not is_flash_attn_varlen_func_available():
    pytest.skip(
        "This test requires flash_attn_varlen_func, but it's not available.",
@@ -170,9 +172,9 @@ def _get_available_reference_backends() -> list[AttentionBackendEnum]:


 class MockAttentionLayer(torch.nn.Module):
-    _q_scale = torch.tensor(1.0, dtype=torch.float32, device="cuda")
-    _k_scale = torch.tensor(1.0, dtype=torch.float32, device="cuda")
-    _v_scale = torch.tensor(1.0, dtype=torch.float32, device="cuda")
+    _q_scale = torch.tensor(1.0, dtype=torch.float32, device=DEVICE_TYPE)
+    _k_scale = torch.tensor(1.0, dtype=torch.float32, device=DEVICE_TYPE)
+    _v_scale = torch.tensor(1.0, dtype=torch.float32, device=DEVICE_TYPE)
    layer_name = "mock_layer"

    def __init__(self):