[Model][Mamba] Add selector for mamba attention backend and make it pluggable for other device (#26487)

Signed-off-by: shen-shanshan <467638484@qq.com>
2025-11-20 00:24:55 +08:00
parent 48fc8b1e59
commit d44e9df7d4
12 changed files with 144 additions and 85 deletions
--- a/vllm/model_executor/layers/mamba/short_conv.py
+++ b/vllm/model_executor/layers/mamba/short_conv.py
@@ -1,10 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from vllm.attention.backends.abstract import AttentionBackend

 import torch

@@ -232,11 +228,6 @@ class ShortConv(MambaBase, CustomOp):
    def mamba_type(self) -> str:
        return "short_conv"

-    def get_attn_backend(self) -> type["AttentionBackend"]:
-        from vllm.v1.attention.backends.short_conv_attn import ShortConvAttentionBackend
-
-        return ShortConvAttentionBackend
-

 def short_conv(
    hidden_states: torch.Tensor,