[UX] Integrate DeepGEMM into vLLM wheel via CMake (#37980)

Signed-off-by: mgoin <mgoin64@gmail.com>
Signed-off-by: Michael Goin <mgoin64@gmail.com>
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Michael Goin
2026-04-09 03:56:32 +02:00
committed by GitHub
parent 83aea2147f
commit eb4205fee5
12 changed files with 251 additions and 40 deletions

View File

@@ -14,7 +14,11 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import (
get_fp8_min_max,
)
from vllm.platforms import current_platform
from vllm.utils.deep_gemm import DeepGemmQuantScaleFMT, has_deep_gemm
from vllm.utils.deep_gemm import (
DeepGemmQuantScaleFMT,
has_deep_gemm,
transform_sf_into_required_layout,
)
from vllm.utils.math_utils import cdiv, round_up
from vllm.utils.torch_utils import set_random_seed
@@ -256,8 +260,6 @@ def test_silu_mul_fp8_quant_deep_gemm(E: int, T: int, H: int, fp8_type: torch.dt
and current_platform.has_device_capability(100)
and scale_fmt == DeepGemmQuantScaleFMT.UE8M0
):
from deep_gemm import transform_sf_into_required_layout
_q, _s = ref_with_scale_fmt(
E,
T,