[Kernel] Build flash-attn from source (#8245)
This commit is contained in:
@@ -244,8 +244,7 @@ def which_attn_to_use(
|
||||
# FlashAttn is valid for the model, checking if the package is installed.
|
||||
if selected_backend == _Backend.FLASH_ATTN:
|
||||
try:
|
||||
import vllm_flash_attn # noqa: F401
|
||||
|
||||
import vllm.vllm_flash_attn # noqa: F401
|
||||
from vllm.attention.backends.flash_attn import ( # noqa: F401
|
||||
FlashAttentionBackend)
|
||||
|
||||
@@ -258,8 +257,9 @@ def which_attn_to_use(
|
||||
except ImportError:
|
||||
logger.info(
|
||||
"Cannot use FlashAttention-2 backend because the "
|
||||
"vllm_flash_attn package is not found. "
|
||||
"`pip install vllm-flash-attn` for better performance.")
|
||||
"vllm.vllm_flash_attn package is not found. "
|
||||
"Make sure that vllm_flash_attn was built and installed "
|
||||
"(on by default).")
|
||||
selected_backend = _Backend.XFORMERS
|
||||
|
||||
return selected_backend
|
||||
|
||||
Reference in New Issue
Block a user