[FlashMLA] Update FlashMLA to expose new arguments (#32810)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
@@ -19,7 +19,7 @@ else()
|
||||
FetchContent_Declare(
|
||||
flashmla
|
||||
GIT_REPOSITORY https://github.com/vllm-project/FlashMLA
|
||||
GIT_TAG 526781394b33d9888e4c41952e692266267dd8bf
|
||||
GIT_TAG c2afa9cb93e674d5a9120a170a6da57b89267208
|
||||
GIT_PROGRESS TRUE
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND ""
|
||||
@@ -30,6 +30,24 @@ endif()
|
||||
FetchContent_MakeAvailable(flashmla)
|
||||
message(STATUS "FlashMLA is available at ${flashmla_SOURCE_DIR}")
|
||||
|
||||
# Vendor FlashMLA interface into vLLM with torch-ops shim.
|
||||
set(FLASHMLA_VENDOR_DIR "${CMAKE_SOURCE_DIR}/vllm/third_party/flashmla")
|
||||
file(MAKE_DIRECTORY "${FLASHMLA_VENDOR_DIR}")
|
||||
file(READ "${flashmla_SOURCE_DIR}/flash_mla/flash_mla_interface.py"
|
||||
FLASHMLA_INTERFACE_CONTENT)
|
||||
string(REPLACE "import flash_mla.cuda as flash_mla_cuda"
|
||||
"import vllm._flashmla_C\nflash_mla_cuda = torch.ops._flashmla_C"
|
||||
FLASHMLA_INTERFACE_CONTENT
|
||||
"${FLASHMLA_INTERFACE_CONTENT}")
|
||||
file(WRITE "${FLASHMLA_VENDOR_DIR}/flash_mla_interface.py"
|
||||
"${FLASHMLA_INTERFACE_CONTENT}")
|
||||
|
||||
# Install the generated flash_mla_interface.py to the wheel
|
||||
# Use COMPONENT _flashmla_C to ensure it's installed with the C extension
|
||||
install(FILES "${FLASHMLA_VENDOR_DIR}/flash_mla_interface.py"
|
||||
DESTINATION vllm/third_party/flashmla/
|
||||
COMPONENT _flashmla_C)
|
||||
|
||||
# The FlashMLA kernels only work on hopper and require CUDA 12.3 or later.
|
||||
# Only build FlashMLA kernels if we are building for something compatible with
|
||||
# sm90a
|
||||
@@ -79,7 +97,6 @@ if(FLASH_MLA_ARCHS)
|
||||
|
||||
# sm100 dense prefill & backward
|
||||
${flashmla_SOURCE_DIR}/csrc/sm100/prefill/dense/fmha_cutlass_fwd_sm100.cu
|
||||
${flashmla_SOURCE_DIR}/csrc/sm100/prefill/dense/fmha_cutlass_bwd_sm100.cu
|
||||
|
||||
# sm100 sparse prefill
|
||||
${flashmla_SOURCE_DIR}/csrc/sm100/prefill/sparse/fwd/head64/instantiations/phase1_k512.cu
|
||||
|
||||
Reference in New Issue
Block a user