[Attention][DCP] Support DCP with query length > 1 (MTP) with FA3 (#25049)

Signed-off-by: Ming Yang <minos.future@gmail.com>
This commit is contained in:
Ming Yang
2025-10-09 08:06:29 -07:00
committed by GitHub
parent 2c1c7dfb35
commit 3b736e1c38
8 changed files with 45 additions and 13 deletions

View File

@@ -38,7 +38,7 @@ else()
FetchContent_Declare(
vllm-flash-attn
GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git
GIT_TAG 4695e6bed5366c41e28c06cd86170166e4f43d00
GIT_TAG 8f468e7da54a8e2f98abfa7c38636aac91c0cba1
GIT_PROGRESS TRUE
# Don't share the vllm-flash-attn build between build types
BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn