[Model][Mamba] Add selector for mamba attention backend and make it pluggable for other device (#26487)

Signed-off-by: shen-shanshan <467638484@qq.com>
This commit is contained in:
Shanshan Shen
2025-11-20 00:24:55 +08:00
committed by GitHub
parent 48fc8b1e59
commit d44e9df7d4
12 changed files with 144 additions and 85 deletions

View File

@@ -1,10 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from vllm.attention.backends.abstract import AttentionBackend
import torch
from torch import nn
@@ -908,11 +904,6 @@ class MambaMixer2(MambaBase, CustomOp):
def mamba_type(self) -> str:
return "mamba2"
def get_attn_backend(self) -> type["AttentionBackend"]:
from vllm.v1.attention.backends.mamba2_attn import Mamba2AttentionBackend
return Mamba2AttentionBackend
def mamba_mixer2(
projected_states: torch.Tensor,