[FEAT] [ROCm]: Add AITER CK 2 Stages MoE support (#17110)

Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
This commit is contained in:
TJian
2025-05-14 18:03:11 +08:00
committed by GitHub
parent 38fe728d60
commit 612c2edb4f
7 changed files with 201 additions and 112 deletions

View File

@@ -224,9 +224,16 @@ def test_mixtral_moe(dtype: torch.dtype, padding: bool, use_rocm_aiter: bool,
"""Make sure our Mixtral MoE implementation agrees with the one from
huggingface."""
# clear the cache before every test
from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import (
is_rocm_aiter_moe_enabled)
is_rocm_aiter_moe_enabled.cache_clear()
if use_rocm_aiter:
monkeypatch.setenv("VLLM_ROCM_USE_AITER", "1")
if dtype == torch.float32:
pytest.skip("AITER ROCm test skip for float32")
# Instantiate our and huggingface's MoE blocks
config = MixtralConfig()
hf_moe = MixtralSparseMoeBlock(config).to(dtype).to("cuda")