include(FetchContent) # If DEEPGEMM_SRC_DIR is set, DeepGEMM is built from that directory # instead of downloading. # It can be set as an environment variable or passed as a cmake argument. # The environment variable takes precedence. if (DEFINED ENV{DEEPGEMM_SRC_DIR}) set(DEEPGEMM_SRC_DIR $ENV{DEEPGEMM_SRC_DIR}) endif() if(DEEPGEMM_SRC_DIR) FetchContent_Declare( deepgemm SOURCE_DIR ${DEEPGEMM_SRC_DIR} CONFIGURE_COMMAND "" BUILD_COMMAND "" ) else() # This ref should be kept in sync with tools/install_deepgemm.sh FetchContent_Declare( deepgemm GIT_REPOSITORY https://github.com/deepseek-ai/DeepGEMM.git GIT_TAG 477618cd51baffca09c4b0b87e97c03fe827ef03 GIT_SUBMODULES "third-party/cutlass" "third-party/fmt" GIT_PROGRESS TRUE CONFIGURE_COMMAND "" BUILD_COMMAND "" ) endif() # Use FetchContent_Populate (not MakeAvailable) to avoid processing # DeepGEMM's own CMakeLists.txt which has incompatible find_package calls. FetchContent_GetProperties(deepgemm) if(NOT deepgemm_POPULATED) FetchContent_Populate(deepgemm) endif() message(STATUS "DeepGEMM is available at ${deepgemm_SOURCE_DIR}") # DeepGEMM requires CUDA 12.3+ for SM90, 12.9+ for SM100 set(DEEPGEMM_SUPPORT_ARCHS) if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.3) list(APPEND DEEPGEMM_SUPPORT_ARCHS "9.0a") endif() if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.9) list(APPEND DEEPGEMM_SUPPORT_ARCHS "10.0f") elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8) list(APPEND DEEPGEMM_SUPPORT_ARCHS "10.0a") endif() cuda_archs_loose_intersection(DEEPGEMM_ARCHS "${DEEPGEMM_SUPPORT_ARCHS}" "${CUDA_ARCHS}") if(DEEPGEMM_ARCHS) message(STATUS "DeepGEMM CUDA architectures: ${DEEPGEMM_ARCHS}") find_package(CUDAToolkit REQUIRED) # # Build the _C pybind11 extension from DeepGEMM's C++ source. # This is a CXX-only module — CUDA kernels are JIT-compiled at runtime. # Python_add_library(_deep_gemm_C MODULE WITH_SOABI "${deepgemm_SOURCE_DIR}/csrc/python_api.cpp") # The pybind11 module name must be _C to match DeepGEMM's Python imports. set_target_properties(_deep_gemm_C PROPERTIES OUTPUT_NAME "_C") target_compile_definitions(_deep_gemm_C PRIVATE "-DTORCH_EXTENSION_NAME=_C") target_include_directories(_deep_gemm_C PRIVATE "${deepgemm_SOURCE_DIR}/csrc" "${deepgemm_SOURCE_DIR}/deep_gemm/include" "${deepgemm_SOURCE_DIR}/third-party/cutlass/include" "${deepgemm_SOURCE_DIR}/third-party/cutlass/tools/util/include" "${deepgemm_SOURCE_DIR}/third-party/fmt/include") target_compile_options(_deep_gemm_C PRIVATE $<$:-std=c++17> $<$:-O3> $<$:-Wno-psabi> $<$:-Wno-deprecated-declarations>) # torch_python is required because DeepGEMM uses pybind11 type casters # for at::Tensor (via PYBIND11_MODULE), unlike vLLM's own extensions which # use torch::Library custom ops. find_library(TORCH_PYTHON_LIBRARY torch_python PATHS "${TORCH_INSTALL_PREFIX}/lib" REQUIRED) target_link_libraries(_deep_gemm_C PRIVATE torch ${TORCH_LIBRARIES} "${TORCH_PYTHON_LIBRARY}" CUDA::cudart CUDA::nvrtc) # Install the shared library into the vendored package directory install(TARGETS _deep_gemm_C LIBRARY DESTINATION vllm/third_party/deep_gemm COMPONENT _deep_gemm_C) # # Vendor DeepGEMM Python package files # install(FILES "${deepgemm_SOURCE_DIR}/deep_gemm/__init__.py" DESTINATION vllm/third_party/deep_gemm COMPONENT _deep_gemm_C) install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/utils/" DESTINATION vllm/third_party/deep_gemm/utils COMPONENT _deep_gemm_C FILES_MATCHING PATTERN "*.py") install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/testing/" DESTINATION vllm/third_party/deep_gemm/testing COMPONENT _deep_gemm_C FILES_MATCHING PATTERN "*.py") install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/legacy/" DESTINATION vllm/third_party/deep_gemm/legacy COMPONENT _deep_gemm_C FILES_MATCHING PATTERN "*.py") # Generate envs.py (normally generated by DeepGEMM's setup.py build step) file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/deep_gemm_envs.py" "# Pre-installed environment variables\npersistent_envs = dict()\n") install(FILES "${CMAKE_CURRENT_BINARY_DIR}/deep_gemm_envs.py" DESTINATION vllm/third_party/deep_gemm RENAME envs.py COMPONENT _deep_gemm_C) # # Install include files needed for JIT compilation at runtime. # The JIT compiler finds these relative to the package directory. # # DeepGEMM's own CUDA headers install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/include/" DESTINATION vllm/third_party/deep_gemm/include COMPONENT _deep_gemm_C) # CUTLASS and CuTe headers (vendored for JIT, separate from vLLM's CUTLASS) install(DIRECTORY "${deepgemm_SOURCE_DIR}/third-party/cutlass/include/" DESTINATION vllm/third_party/deep_gemm/include COMPONENT _deep_gemm_C) else() message(STATUS "DeepGEMM will not compile: " "unsupported CUDA architecture ${CUDA_ARCHS}") # Create empty target so setup.py doesn't fail on unsupported systems add_custom_target(_deep_gemm_C) endif()