[Feature] Migrate DeepGEMM API from get_m_alignment_for_contiguous_layout to get_mk_alignment_for_contiguous_layout (#26935)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Wentao Ye
2025-10-16 16:46:48 -04:00
committed by GitHub
parent fb0571b077
commit b3dda72c23
8 changed files with 57 additions and 46 deletions

View File

@@ -22,13 +22,13 @@ from vllm.model_executor.layers.fused_moe.fused_moe import (
)
from vllm.platforms import current_platform
from vllm.utils import has_deep_gemm
from vllm.utils.deep_gemm import is_deep_gemm_e8m0_used
from vllm.utils.deep_gemm import (
get_mk_alignment_for_contiguous_layout,
is_deep_gemm_e8m0_used,
)
dg_available = has_deep_gemm()
if dg_available:
from deep_gemm import get_m_alignment_for_contiguous_layout
if current_platform.get_device_capability() < (9, 0):
pytest.skip("FP8 Triton requires CUDA 9.0 or higher", allow_module_level=True)
@@ -218,8 +218,7 @@ def test_w8a8_block_fp8_deep_gemm_fused_moe(M, N, K, E, topk, seed, monkeypatch)
torch.manual_seed(seed)
monkeypatch.setenv("VLLM_FUSED_MOE_CHUNK_SIZE", str(chunk_size))
block_m = get_m_alignment_for_contiguous_layout()
block_size = [block_m, block_m]
block_size = get_mk_alignment_for_contiguous_layout()
dtype = torch.bfloat16
a = torch.randn((M, K), dtype=dtype) / 10