[Bugfix][Kernel] Give unique name to BlockSparseFlashAttention (#12040)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
2025-01-14 23:45:05 +08:00
parent 2e0e017610
commit a2d2acb4c8
2 changed files with 2 additions and 2 deletions
--- a/vllm/attention/backends/blocksparse_attn.py
+++ b/vllm/attention/backends/blocksparse_attn.py
@@ -89,8 +89,7 @@ class BlocksparseFlashAttentionBackend(AttentionBackend):

    @staticmethod
    def get_name() -> str:
-        # For attention layer compatibility
-        return "FLASH_ATTN"
+        return "BLOCK_SPARSE_FLASH_ATTN"

    @staticmethod
    def get_impl_cls() -> Type["BlocksparseFlashAttentionImpl"]:
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -33,6 +33,7 @@ class _Backend(enum.Enum):
    HPU_ATTN = enum.auto()
    PALLAS = enum.auto()
    IPEX = enum.auto()
+    BLOCK_SPARSE_FLASH_ATTN = enum.auto()
    NO_ATTENTION = enum.auto()