[BugFix] Fix vllm_flash_attn install issues (#17267)

Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
Co-authored-by: Aaron Pham <contact@aarnphm.xyz>
This commit is contained in:
Lucas Wilkinson
2025-04-27 20:27:56 -04:00
committed by GitHub
parent 20e489eaa1
commit d8bccde686
11 changed files with 28 additions and 284 deletions

View File

@@ -1377,7 +1377,7 @@ class EngineArgs:
) or envs.VLLM_ATTENTION_BACKEND == "FLASH_ATTN_VLLM_V1"
supported = False
if fp8_attention and will_use_fa:
from vllm.vllm_flash_attn.fa_utils import (
from vllm.attention.utils.fa_utils import (
flash_attn_supports_fp8)
supported = flash_attn_supports_fp8()
if not supported: