diff --git a/vllm/v1/attention/backends/gdn_attn.py b/vllm/v1/attention/backends/gdn_attn.py index 426c17689..cc7e7844d 100644 --- a/vllm/v1/attention/backends/gdn_attn.py +++ b/vllm/v1/attention/backends/gdn_attn.py @@ -22,6 +22,10 @@ from vllm.v1.kv_cache_interface import AttentionSpec, MambaSpec class GDNAttentionBackend(AttentionBackend): + @staticmethod + def get_name() -> str: + return "GDN_ATTN" + @staticmethod def get_builder_cls() -> type["GDNAttentionMetadataBuilder"]: return GDNAttentionMetadataBuilder diff --git a/vllm/v1/attention/backends/linear_attn.py b/vllm/v1/attention/backends/linear_attn.py index 4ef565691..6f9b84cd3 100644 --- a/vllm/v1/attention/backends/linear_attn.py +++ b/vllm/v1/attention/backends/linear_attn.py @@ -16,6 +16,10 @@ from vllm.v1.kv_cache_interface import AttentionSpec, MambaSpec class LinearAttentionBackend(AttentionBackend): + @staticmethod + def get_name() -> str: + return "LINEAR_ATTN" + @staticmethod def get_builder_cls() -> type["LinearAttentionMetadataBuilder"]: return LinearAttentionMetadataBuilder diff --git a/vllm/v1/attention/backends/mamba1_attn.py b/vllm/v1/attention/backends/mamba1_attn.py index 9d4a37576..bf0c68b65 100644 --- a/vllm/v1/attention/backends/mamba1_attn.py +++ b/vllm/v1/attention/backends/mamba1_attn.py @@ -11,6 +11,10 @@ from vllm.v1.attention.backends.mamba_attn import ( class Mamba1AttentionBackend(AttentionBackend): + @staticmethod + def get_name() -> str: + return "MAMBA1_ATTN" + @staticmethod def get_builder_cls() -> type["Mamba1AttentionMetadataBuilder"]: return Mamba1AttentionMetadataBuilder diff --git a/vllm/v1/attention/backends/mamba2_attn.py b/vllm/v1/attention/backends/mamba2_attn.py index f45315f1e..dea615a64 100644 --- a/vllm/v1/attention/backends/mamba2_attn.py +++ b/vllm/v1/attention/backends/mamba2_attn.py @@ -7,7 +7,10 @@ import torch from vllm.config import VllmConfig from vllm.utils.math_utils import cdiv -from vllm.v1.attention.backend import AttentionBackend, CommonAttentionMetadata +from vllm.v1.attention.backend import ( + AttentionBackend, + CommonAttentionMetadata, +) from vllm.v1.attention.backends.mamba_attn import ( BaseMambaAttentionMetadata, BaseMambaAttentionMetadataBuilder, @@ -85,6 +88,10 @@ def compute_varlen_chunk_metadata( class Mamba2AttentionBackend(AttentionBackend): + @staticmethod + def get_name() -> str: + return "MAMBA2_ATTN" + @staticmethod def get_builder_cls() -> type["Mamba2AttentionMetadataBuilder"]: return Mamba2AttentionMetadataBuilder diff --git a/vllm/v1/attention/backends/mla/indexer.py b/vllm/v1/attention/backends/mla/indexer.py index 363979b4a..8c1ea1646 100644 --- a/vllm/v1/attention/backends/mla/indexer.py +++ b/vllm/v1/attention/backends/mla/indexer.py @@ -25,6 +25,10 @@ logger = init_logger(__name__) class DeepseekV32IndexerBackend(AttentionBackend): + @staticmethod + def get_name() -> str: + return "DEEPSEEK_V32_INDEXER" + @staticmethod def get_supported_kernel_block_sizes() -> list[int | MultipleOf]: return [1 if current_platform.is_rocm() else 64] diff --git a/vllm/v1/attention/backends/short_conv_attn.py b/vllm/v1/attention/backends/short_conv_attn.py index dc6b425ce..c6a8e6eea 100644 --- a/vllm/v1/attention/backends/short_conv_attn.py +++ b/vllm/v1/attention/backends/short_conv_attn.py @@ -10,6 +10,10 @@ from vllm.v1.attention.backends.mamba_attn import ( class ShortConvAttentionBackend(AttentionBackend): + @staticmethod + def get_name() -> str: + return "SHORT_CONV_ATTN" + @staticmethod def get_builder_cls() -> type["ShortConvAttentionMetadataBuilder"]: return ShortConvAttentionMetadataBuilder