[UX] Integrate DeepGEMM into vLLM wheel via CMake (#37980)

Signed-off-by: mgoin <mgoin64@gmail.com>
Signed-off-by: Michael Goin <mgoin64@gmail.com>
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Michael Goin
2026-04-09 03:56:32 +02:00
committed by GitHub
parent 83aea2147f
commit eb4205fee5
12 changed files with 251 additions and 40 deletions

View File

@@ -379,6 +379,20 @@ class cmake_build_ext(build_ext):
dirs_exist_ok=True,
)
if _is_cuda():
# copy vendored deep_gemm package from build_lib to source tree
# for editable installs
deep_gemm_build = os.path.join(
self.build_lib, "vllm", "third_party", "deep_gemm"
)
if os.path.exists(deep_gemm_build):
print(f"Copying {deep_gemm_build} to vllm/third_party/deep_gemm")
shutil.copytree(
deep_gemm_build,
"vllm/third_party/deep_gemm",
dirs_exist_ok=True,
)
class precompiled_build_ext(build_ext):
"""Disables extension building when using precompiled binaries."""
@@ -685,6 +699,8 @@ class precompiled_wheel_utils:
flashmla_regex = re.compile(
r"vllm/third_party/flashmla/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py"
)
# DeepGEMM: extract all files (.py, .so, .cuh, .h, .hpp, etc.)
deep_gemm_regex = re.compile(r"vllm/third_party/deep_gemm/.*")
file_members = list(
filter(lambda x: x.filename in files_to_copy, wheel.filelist)
)
@@ -699,6 +715,9 @@ class precompiled_wheel_utils:
file_members += list(
filter(lambda x: flashmla_regex.match(x.filename), wheel.filelist)
)
file_members += list(
filter(lambda x: deep_gemm_regex.match(x.filename), wheel.filelist)
)
for file in file_members:
print(f"[extract] {file.filename}")
@@ -987,6 +1006,12 @@ if _is_cuda():
ext_modules.append(
CMakeExtension(name="vllm._flashmla_extension_C", optional=True)
)
if envs.VLLM_USE_PRECOMPILED or (
CUDA_HOME and get_nvcc_cuda_version() >= Version("12.3")
):
# DeepGEMM requires CUDA 12.3+ (SM90/SM100)
# Optional since it won't build on unsupported architectures
ext_modules.append(CMakeExtension(name="vllm._deep_gemm_C", optional=True))
if _is_cpu():
import platform
@@ -1014,6 +1039,10 @@ package_data = {
"entrypoints/serve/instrumentator/static/*.js",
"entrypoints/serve/instrumentator/static/*.css",
"distributed/kv_transfer/kv_connector/v1/hf3fs/utils/*.cpp",
# DeepGEMM JIT include headers (vendored via cmake)
"third_party/deep_gemm/include/**/*.cuh",
"third_party/deep_gemm/include/**/*.h",
"third_party/deep_gemm/include/**/*.hpp",
]
}