From 22482e495e00d409c9b5c78dade6e672ddf7fbc2 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Fri, 4 Oct 2024 11:43:15 -0400 Subject: [PATCH] [Bugfix] Flash attention arches not getting set properly (#9062) --- CMakeLists.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a6c1fb14..7b24c4abc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -482,6 +482,17 @@ if (NOT VLLM_TARGET_DEVICE STREQUAL "cuda") return() endif () +# vLLM flash attention requires VLLM_GPU_ARCHES to contain the set of target +# arches in the CMake syntax (75-real, 89-virtual, etc), since we clear the +# arches in the CUDA case (and instead set the gencodes on a per file basis) +# we need to manually set VLLM_GPU_ARCHES here. +if(VLLM_GPU_LANG STREQUAL "CUDA") + foreach(_ARCH ${CUDA_ARCHS}) + string(REPLACE "." "" _ARCH "${_ARCH}") + list(APPEND VLLM_GPU_ARCHES "${_ARCH}-real") + endforeach() +endif() + # # Build vLLM flash attention from source #