[Misc] Use vllm-flash-attn instead of flash-attn (#4686)

2024-05-08 13:15:34 -07:00
parent 230c4b38c1
commit 89579a201f
6 changed files with 16 additions and 31 deletions
--- a/vllm/attention/selector.py
+++ b/vllm/attention/selector.py
@@ -76,11 +76,12 @@ def _which_attn_to_use(dtype: torch.dtype) -> _Backend:
        return _Backend.XFORMERS

    try:
-        import flash_attn  # noqa: F401
+        import vllm_flash_attn  # noqa: F401
    except ImportError:
        logger.info(
-            "Cannot use FlashAttention-2 backend because the flash_attn "
-            "package is not found. Please install it for better performance.")
+            "Cannot use FlashAttention-2 backend because the vllm_flash_attn "
+            "package is not found. `pip install vllm-flash-attn` for better "
+            "performance.")
        return _Backend.XFORMERS

    backend_by_env_var = envs.VLLM_ATTENTION_BACKEND