[UX] Integrate DeepGEMM into vLLM wheel via CMake (#37980)

Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: Claude <noreply@anthropic.com>
2026-04-09 03:56:32 +02:00
parent 83aea2147f
commit eb4205fee5
12 changed files with 251 additions and 40 deletions
--- a/cmake/external_projects/deepgemm.cmake
+++ b/cmake/external_projects/deepgemm.cmake
@@ -0,0 +1,151 @@
+include(FetchContent)
+
+# If DEEPGEMM_SRC_DIR is set, DeepGEMM is built from that directory
+# instead of downloading.
+# It can be set as an environment variable or passed as a cmake argument.
+# The environment variable takes precedence.
+if (DEFINED ENV{DEEPGEMM_SRC_DIR})
+  set(DEEPGEMM_SRC_DIR $ENV{DEEPGEMM_SRC_DIR})
+endif()
+
+if(DEEPGEMM_SRC_DIR)
+  FetchContent_Declare(
+    deepgemm
+    SOURCE_DIR ${DEEPGEMM_SRC_DIR}
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND ""
+  )
+else()
+  # This ref should be kept in sync with tools/install_deepgemm.sh
+  FetchContent_Declare(
+    deepgemm
+    GIT_REPOSITORY https://github.com/deepseek-ai/DeepGEMM.git
+    GIT_TAG 477618cd51baffca09c4b0b87e97c03fe827ef03
+    GIT_SUBMODULES "third-party/cutlass" "third-party/fmt"
+    GIT_PROGRESS TRUE
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND ""
+  )
+endif()
+
+# Use FetchContent_Populate (not MakeAvailable) to avoid processing
+# DeepGEMM's own CMakeLists.txt which has incompatible find_package calls.
+FetchContent_GetProperties(deepgemm)
+if(NOT deepgemm_POPULATED)
+  FetchContent_Populate(deepgemm)
+endif()
+message(STATUS "DeepGEMM is available at ${deepgemm_SOURCE_DIR}")
+
+# DeepGEMM requires CUDA 12.3+ for SM90, 12.9+ for SM100
+set(DEEPGEMM_SUPPORT_ARCHS)
+if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.3)
+  list(APPEND DEEPGEMM_SUPPORT_ARCHS "9.0a")
+endif()
+if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.9)
+  list(APPEND DEEPGEMM_SUPPORT_ARCHS "10.0f")
+elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8)
+  list(APPEND DEEPGEMM_SUPPORT_ARCHS "10.0a")
+endif()
+
+cuda_archs_loose_intersection(DEEPGEMM_ARCHS
+  "${DEEPGEMM_SUPPORT_ARCHS}" "${CUDA_ARCHS}")
+
+if(DEEPGEMM_ARCHS)
+  message(STATUS "DeepGEMM CUDA architectures: ${DEEPGEMM_ARCHS}")
+
+  find_package(CUDAToolkit REQUIRED)
+
+  #
+  # Build the _C pybind11 extension from DeepGEMM's C++ source.
+  # This is a CXX-only module — CUDA kernels are JIT-compiled at runtime.
+  #
+  Python_add_library(_deep_gemm_C MODULE WITH_SOABI
+    "${deepgemm_SOURCE_DIR}/csrc/python_api.cpp")
+
+  # The pybind11 module name must be _C to match DeepGEMM's Python imports.
+  set_target_properties(_deep_gemm_C PROPERTIES OUTPUT_NAME "_C")
+
+  target_compile_definitions(_deep_gemm_C PRIVATE
+    "-DTORCH_EXTENSION_NAME=_C")
+
+  target_include_directories(_deep_gemm_C PRIVATE
+    "${deepgemm_SOURCE_DIR}/csrc"
+    "${deepgemm_SOURCE_DIR}/deep_gemm/include"
+    "${deepgemm_SOURCE_DIR}/third-party/cutlass/include"
+    "${deepgemm_SOURCE_DIR}/third-party/cutlass/tools/util/include"
+    "${deepgemm_SOURCE_DIR}/third-party/fmt/include")
+
+  target_compile_options(_deep_gemm_C PRIVATE
+    $<$<COMPILE_LANGUAGE:CXX>:-std=c++17>
+    $<$<COMPILE_LANGUAGE:CXX>:-O3>
+    $<$<COMPILE_LANGUAGE:CXX>:-Wno-psabi>
+    $<$<COMPILE_LANGUAGE:CXX>:-Wno-deprecated-declarations>)
+
+  # torch_python is required because DeepGEMM uses pybind11 type casters
+  # for at::Tensor (via PYBIND11_MODULE), unlike vLLM's own extensions which
+  # use torch::Library custom ops.
+  find_library(TORCH_PYTHON_LIBRARY torch_python
+    PATHS "${TORCH_INSTALL_PREFIX}/lib"
+    REQUIRED)
+
+  target_link_libraries(_deep_gemm_C PRIVATE
+    torch ${TORCH_LIBRARIES} "${TORCH_PYTHON_LIBRARY}"
+    CUDA::cudart CUDA::nvrtc)
+
+  # Install the shared library into the vendored package directory
+  install(TARGETS _deep_gemm_C
+    LIBRARY DESTINATION vllm/third_party/deep_gemm
+    COMPONENT _deep_gemm_C)
+
+  #
+  # Vendor DeepGEMM Python package files
+  #
+  install(FILES
+    "${deepgemm_SOURCE_DIR}/deep_gemm/__init__.py"
+    DESTINATION vllm/third_party/deep_gemm
+    COMPONENT _deep_gemm_C)
+
+  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/utils/"
+    DESTINATION vllm/third_party/deep_gemm/utils
+    COMPONENT _deep_gemm_C
+    FILES_MATCHING PATTERN "*.py")
+
+  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/testing/"
+    DESTINATION vllm/third_party/deep_gemm/testing
+    COMPONENT _deep_gemm_C
+    FILES_MATCHING PATTERN "*.py")
+
+  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/legacy/"
+    DESTINATION vllm/third_party/deep_gemm/legacy
+    COMPONENT _deep_gemm_C
+    FILES_MATCHING PATTERN "*.py")
+
+  # Generate envs.py (normally generated by DeepGEMM's setup.py build step)
+  file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/deep_gemm_envs.py"
+    "# Pre-installed environment variables\npersistent_envs = dict()\n")
+  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/deep_gemm_envs.py"
+    DESTINATION vllm/third_party/deep_gemm
+    RENAME envs.py
+    COMPONENT _deep_gemm_C)
+
+  #
+  # Install include files needed for JIT compilation at runtime.
+  # The JIT compiler finds these relative to the package directory.
+  #
+
+  # DeepGEMM's own CUDA headers
+  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/include/"
+    DESTINATION vllm/third_party/deep_gemm/include
+    COMPONENT _deep_gemm_C)
+
+  # CUTLASS and CuTe headers (vendored for JIT, separate from vLLM's CUTLASS)
+  install(DIRECTORY "${deepgemm_SOURCE_DIR}/third-party/cutlass/include/"
+    DESTINATION vllm/third_party/deep_gemm/include
+    COMPONENT _deep_gemm_C)
+
+else()
+  message(STATUS "DeepGEMM will not compile: "
+    "unsupported CUDA architecture ${CUDA_ARCHS}")
+  # Create empty target so setup.py doesn't fail on unsupported systems
+  add_custom_target(_deep_gemm_C)
+endif()