[Feature] Migrate DeepGEMM API from get_m_alignment_for_contiguous_layout to get_mk_alignment_for_contiguous_layout (#26935)

Signed-off-by: yewentao256 <zhyanwentao@126.com> Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-10-16 16:46:48 -04:00
parent fb0571b077
commit b3dda72c23
8 changed files with 57 additions and 46 deletions
--- a/tests/kernels/moe/test_block_fp8.py
+++ b/tests/kernels/moe/test_block_fp8.py
@@ -22,13 +22,13 @@ from vllm.model_executor.layers.fused_moe.fused_moe import (
 )
 from vllm.platforms import current_platform
 from vllm.utils import has_deep_gemm
-from vllm.utils.deep_gemm import is_deep_gemm_e8m0_used
+from vllm.utils.deep_gemm import (
+    get_mk_alignment_for_contiguous_layout,
+    is_deep_gemm_e8m0_used,
+)

 dg_available = has_deep_gemm()

-if dg_available:
-    from deep_gemm import get_m_alignment_for_contiguous_layout
-
 if current_platform.get_device_capability() < (9, 0):
    pytest.skip("FP8 Triton requires CUDA 9.0 or higher", allow_module_level=True)

@@ -218,8 +218,7 @@ def test_w8a8_block_fp8_deep_gemm_fused_moe(M, N, K, E, topk, seed, monkeypatch)
    torch.manual_seed(seed)

    monkeypatch.setenv("VLLM_FUSED_MOE_CHUNK_SIZE", str(chunk_size))
-    block_m = get_m_alignment_for_contiguous_layout()
-    block_size = [block_m, block_m]
+    block_size = get_mk_alignment_for_contiguous_layout()
    dtype = torch.bfloat16

    a = torch.randn((M, K), dtype=dtype) / 10