[CUDA graphs] Enable full cuda graphs with FA3 AoT scheduling (#20301)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-07-01 09:07:36 -07:00
parent 314af8617c
commit 8acb4badee
2 changed files with 54 additions and 7 deletions
--- a/cmake/external_projects/vllm_flash_attn.cmake
+++ b/cmake/external_projects/vllm_flash_attn.cmake
@@ -38,7 +38,7 @@ else()
  FetchContent_Declare(
          vllm-flash-attn
          GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git
-          GIT_TAG 5f3644181c7a15345ce20bfc65af117d3601b524
+          GIT_TAG 1c2624e53c078854e0637ee566c72fe2107e75f4
          GIT_PROGRESS TRUE
          # Don't share the vllm-flash-attn build between build types
          BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn