[Bugfix][Kernel] Fix CUDA 11.8 being broken by FA3 build (#12375)

Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
This commit is contained in:
Lucas Wilkinson
2025-01-24 10:27:59 -05:00
committed by GitHub
parent 3bb8e2c9a2
commit ab5bbf5ae3
6 changed files with 42 additions and 22 deletions

5
setup.py Normal file → Executable file
View File

@@ -598,7 +598,10 @@ if _is_hip():
if _is_cuda():
ext_modules.append(CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa2_C"))
ext_modules.append(CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa3_C"))
if envs.VLLM_USE_PRECOMPILED or get_nvcc_cuda_version() >= Version("12.0"):
# FA3 requires CUDA 12.0 or later
ext_modules.append(
CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa3_C"))
ext_modules.append(CMakeExtension(name="vllm.cumem_allocator"))
if _build_custom_ops():