[ROCm] Fix MoE kernel test failures on gfx950 (#37833)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
Signed-off-by: Matthew Wong <Matthew.Wong2@amd.com>
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Co-authored-by: Matthew Wong <Matthew.Wong2@amd.com>
This commit is contained in:
Andreas Karatzas
2026-03-25 13:46:40 -05:00
committed by GitHub
parent e38817fadb
commit 7d6917bef5
12 changed files with 478 additions and 86 deletions

View File

@@ -16,7 +16,7 @@ from vllm.platforms import current_platform
"platform_method,expected_backend",
[
("is_cuda", UnquantizedMoeBackend.TRITON), # Default CUDA without FlashInfer
("is_rocm", UnquantizedMoeBackend.TRITON),
("is_rocm", UnquantizedMoeBackend.TRITON), # ROCm without AITER
("is_cpu", UnquantizedMoeBackend.CPU),
("is_xpu", UnquantizedMoeBackend.XPU),
("is_tpu", UnquantizedMoeBackend.TPU),
@@ -27,13 +27,19 @@ from vllm.platforms import current_platform
"vllm.model_executor.layers.fused_moe.oracle.unquantized.has_flashinfer",
return_value=False,
)
@patch(
"vllm.model_executor.layers.fused_moe.oracle.unquantized.rocm_aiter_ops.is_fused_moe_enabled",
return_value=False,
)
def test_select_default_backend_by_platform(
mock_aiter_enabled,
mock_has_flashinfer,
monkeypatch,
platform_method,
expected_backend,
):
"""Test backend selection for different platforms."""
"""Test default backend selection per platform with all optional
accelerators (FlashInfer, AITER) disabled."""
with patch(
"vllm.model_executor.layers.fused_moe.oracle.unquantized.current_platform"
) as mock_platform:
@@ -58,6 +64,39 @@ def test_select_default_backend_by_platform(
assert selected_backend == expected_backend
@patch(
"vllm.model_executor.layers.fused_moe.oracle.unquantized.has_flashinfer",
return_value=False,
)
@patch(
"vllm.model_executor.layers.fused_moe.oracle.unquantized.rocm_aiter_ops.is_fused_moe_enabled",
return_value=True,
)
@pytest.mark.skipif(
not current_platform.is_rocm(), reason="ROCm-specific backend selection test"
)
def test_select_rocm_aiter_backend(mock_aiter_enabled, mock_has_flashinfer):
"""Test ROCm backend selection when AITER is available."""
with patch(
"vllm.model_executor.layers.fused_moe.oracle.unquantized.current_platform"
) as mock_platform:
mock_platform.is_cuda.return_value = False
mock_platform.is_rocm.return_value = True
mock_platform.is_cpu.return_value = False
mock_platform.is_xpu.return_value = False
mock_platform.is_tpu.return_value = False
mock_platform.is_out_of_tree.return_value = False
moe_config = make_dummy_moe_config()
selected_backend = select_unquantized_moe_backend(
moe_config=moe_config,
use_ep=False,
use_dp=False,
)
assert selected_backend == UnquantizedMoeBackend.AITER
@patch(
"vllm.model_executor.layers.fused_moe.oracle.unquantized.has_flashinfer",
return_value=True,