[Misc] Use vllm-flash-attn instead of flash-attn (#4686)

This commit is contained in:
Woosuk Kwon
2024-05-08 13:15:34 -07:00
committed by GitHub
parent 230c4b38c1
commit 89579a201f
6 changed files with 16 additions and 31 deletions

View File

@@ -76,11 +76,12 @@ def _which_attn_to_use(dtype: torch.dtype) -> _Backend:
return _Backend.XFORMERS
try:
import flash_attn # noqa: F401
import vllm_flash_attn # noqa: F401
except ImportError:
logger.info(
"Cannot use FlashAttention-2 backend because the flash_attn "
"package is not found. Please install it for better performance.")
"Cannot use FlashAttention-2 backend because the vllm_flash_attn "
"package is not found. `pip install vllm-flash-attn` for better "
"performance.")
return _Backend.XFORMERS
backend_by_env_var = envs.VLLM_ATTENTION_BACKEND