[UX] Integrate DeepGEMM into vLLM wheel via CMake (#37980)
Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
151
cmake/external_projects/deepgemm.cmake
Normal file
151
cmake/external_projects/deepgemm.cmake
Normal file
@@ -0,0 +1,151 @@
|
||||
include(FetchContent)
|
||||
|
||||
# If DEEPGEMM_SRC_DIR is set, DeepGEMM is built from that directory
|
||||
# instead of downloading.
|
||||
# It can be set as an environment variable or passed as a cmake argument.
|
||||
# The environment variable takes precedence.
|
||||
if (DEFINED ENV{DEEPGEMM_SRC_DIR})
|
||||
set(DEEPGEMM_SRC_DIR $ENV{DEEPGEMM_SRC_DIR})
|
||||
endif()
|
||||
|
||||
if(DEEPGEMM_SRC_DIR)
|
||||
FetchContent_Declare(
|
||||
deepgemm
|
||||
SOURCE_DIR ${DEEPGEMM_SRC_DIR}
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
)
|
||||
else()
|
||||
# This ref should be kept in sync with tools/install_deepgemm.sh
|
||||
FetchContent_Declare(
|
||||
deepgemm
|
||||
GIT_REPOSITORY https://github.com/deepseek-ai/DeepGEMM.git
|
||||
GIT_TAG 477618cd51baffca09c4b0b87e97c03fe827ef03
|
||||
GIT_SUBMODULES "third-party/cutlass" "third-party/fmt"
|
||||
GIT_PROGRESS TRUE
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
)
|
||||
endif()
|
||||
|
||||
# Use FetchContent_Populate (not MakeAvailable) to avoid processing
|
||||
# DeepGEMM's own CMakeLists.txt which has incompatible find_package calls.
|
||||
FetchContent_GetProperties(deepgemm)
|
||||
if(NOT deepgemm_POPULATED)
|
||||
FetchContent_Populate(deepgemm)
|
||||
endif()
|
||||
message(STATUS "DeepGEMM is available at ${deepgemm_SOURCE_DIR}")
|
||||
|
||||
# DeepGEMM requires CUDA 12.3+ for SM90, 12.9+ for SM100
|
||||
set(DEEPGEMM_SUPPORT_ARCHS)
|
||||
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.3)
|
||||
list(APPEND DEEPGEMM_SUPPORT_ARCHS "9.0a")
|
||||
endif()
|
||||
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.9)
|
||||
list(APPEND DEEPGEMM_SUPPORT_ARCHS "10.0f")
|
||||
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8)
|
||||
list(APPEND DEEPGEMM_SUPPORT_ARCHS "10.0a")
|
||||
endif()
|
||||
|
||||
cuda_archs_loose_intersection(DEEPGEMM_ARCHS
|
||||
"${DEEPGEMM_SUPPORT_ARCHS}" "${CUDA_ARCHS}")
|
||||
|
||||
if(DEEPGEMM_ARCHS)
|
||||
message(STATUS "DeepGEMM CUDA architectures: ${DEEPGEMM_ARCHS}")
|
||||
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
#
|
||||
# Build the _C pybind11 extension from DeepGEMM's C++ source.
|
||||
# This is a CXX-only module — CUDA kernels are JIT-compiled at runtime.
|
||||
#
|
||||
Python_add_library(_deep_gemm_C MODULE WITH_SOABI
|
||||
"${deepgemm_SOURCE_DIR}/csrc/python_api.cpp")
|
||||
|
||||
# The pybind11 module name must be _C to match DeepGEMM's Python imports.
|
||||
set_target_properties(_deep_gemm_C PROPERTIES OUTPUT_NAME "_C")
|
||||
|
||||
target_compile_definitions(_deep_gemm_C PRIVATE
|
||||
"-DTORCH_EXTENSION_NAME=_C")
|
||||
|
||||
target_include_directories(_deep_gemm_C PRIVATE
|
||||
"${deepgemm_SOURCE_DIR}/csrc"
|
||||
"${deepgemm_SOURCE_DIR}/deep_gemm/include"
|
||||
"${deepgemm_SOURCE_DIR}/third-party/cutlass/include"
|
||||
"${deepgemm_SOURCE_DIR}/third-party/cutlass/tools/util/include"
|
||||
"${deepgemm_SOURCE_DIR}/third-party/fmt/include")
|
||||
|
||||
target_compile_options(_deep_gemm_C PRIVATE
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-std=c++17>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-O3>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Wno-psabi>
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Wno-deprecated-declarations>)
|
||||
|
||||
# torch_python is required because DeepGEMM uses pybind11 type casters
|
||||
# for at::Tensor (via PYBIND11_MODULE), unlike vLLM's own extensions which
|
||||
# use torch::Library custom ops.
|
||||
find_library(TORCH_PYTHON_LIBRARY torch_python
|
||||
PATHS "${TORCH_INSTALL_PREFIX}/lib"
|
||||
REQUIRED)
|
||||
|
||||
target_link_libraries(_deep_gemm_C PRIVATE
|
||||
torch ${TORCH_LIBRARIES} "${TORCH_PYTHON_LIBRARY}"
|
||||
CUDA::cudart CUDA::nvrtc)
|
||||
|
||||
# Install the shared library into the vendored package directory
|
||||
install(TARGETS _deep_gemm_C
|
||||
LIBRARY DESTINATION vllm/third_party/deep_gemm
|
||||
COMPONENT _deep_gemm_C)
|
||||
|
||||
#
|
||||
# Vendor DeepGEMM Python package files
|
||||
#
|
||||
install(FILES
|
||||
"${deepgemm_SOURCE_DIR}/deep_gemm/__init__.py"
|
||||
DESTINATION vllm/third_party/deep_gemm
|
||||
COMPONENT _deep_gemm_C)
|
||||
|
||||
install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/utils/"
|
||||
DESTINATION vllm/third_party/deep_gemm/utils
|
||||
COMPONENT _deep_gemm_C
|
||||
FILES_MATCHING PATTERN "*.py")
|
||||
|
||||
install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/testing/"
|
||||
DESTINATION vllm/third_party/deep_gemm/testing
|
||||
COMPONENT _deep_gemm_C
|
||||
FILES_MATCHING PATTERN "*.py")
|
||||
|
||||
install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/legacy/"
|
||||
DESTINATION vllm/third_party/deep_gemm/legacy
|
||||
COMPONENT _deep_gemm_C
|
||||
FILES_MATCHING PATTERN "*.py")
|
||||
|
||||
# Generate envs.py (normally generated by DeepGEMM's setup.py build step)
|
||||
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/deep_gemm_envs.py"
|
||||
"# Pre-installed environment variables\npersistent_envs = dict()\n")
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/deep_gemm_envs.py"
|
||||
DESTINATION vllm/third_party/deep_gemm
|
||||
RENAME envs.py
|
||||
COMPONENT _deep_gemm_C)
|
||||
|
||||
#
|
||||
# Install include files needed for JIT compilation at runtime.
|
||||
# The JIT compiler finds these relative to the package directory.
|
||||
#
|
||||
|
||||
# DeepGEMM's own CUDA headers
|
||||
install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/include/"
|
||||
DESTINATION vllm/third_party/deep_gemm/include
|
||||
COMPONENT _deep_gemm_C)
|
||||
|
||||
# CUTLASS and CuTe headers (vendored for JIT, separate from vLLM's CUTLASS)
|
||||
install(DIRECTORY "${deepgemm_SOURCE_DIR}/third-party/cutlass/include/"
|
||||
DESTINATION vllm/third_party/deep_gemm/include
|
||||
COMPONENT _deep_gemm_C)
|
||||
|
||||
else()
|
||||
message(STATUS "DeepGEMM will not compile: "
|
||||
"unsupported CUDA architecture ${CUDA_ARCHS}")
|
||||
# Create empty target so setup.py doesn't fail on unsupported systems
|
||||
add_custom_target(_deep_gemm_C)
|
||||
endif()
|
||||
Reference in New Issue
Block a user