# NOTES: current just for CMake-based IDE (e.g. CLion) indexing, the real compilation is done via JIT cmake_minimum_required(VERSION 3.10) project(deep_gemm LANGUAGES CXX CUDA) set(CMAKE_VERBOSE_MAKEFILE ON) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -fPIC -Wno-psabi") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fPIC -Wno-psabi") set(CUDA_SEPARABLE_COMPILATION ON) list(APPEND CUDA_NVCC_FLAGS "-DENABLE_FAST_DEBUG") list(APPEND CUDA_NVCC_FLAGS "-O3") list(APPEND CUDA_NVCC_FLAGS "--ptxas-options=--verbose,--register-usage-level=10,--warn-on-local-memory-usage") set(USE_SYSTEM_NVTX on) set(CUDA_ARCH_LIST "9.0" CACHE STRING "List of CUDA architectures to compile") set(TORCH_CUDA_ARCH_LIST "${CUDA_ARCH_LIST}") find_package(CUDAToolkit REQUIRED) find_package(pybind11 REQUIRED) find_package(Torch REQUIRED) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CUDA_STANDARD 17) include_directories(deep_gemm/include third-party/cutlass/include third-party/cutlass/tools/util/include third-party/fmt/include) include_directories(${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/include ${TORCH_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS}) link_directories(${TORCH_INSTALL_PREFIX}/lib ${CUDA_TOOLKIT_ROOT_DIR}/lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs) # The main Python API entrance pybind11_add_module(deep_gemm_cpp csrc/python_api.cpp) target_link_libraries(deep_gemm_cpp PRIVATE ${TORCH_LIBRARIES} torch_python cuda) # Enable kernel code indexing with CMake-based IDEs cuda_add_library(deep_gemm_indexing_cuda STATIC csrc/indexing/main.cu)