[Attention] Unify mamba and attention backend selection (#23171)

Signed-off-by: Ayush Satyam <ayushsatyam146@gmail.com>
2025-08-25 14:39:36 +05:30
parent d0a4a3f645
commit 5c4b6e66fe
11 changed files with 186 additions and 72 deletions
--- a/vllm/model_executor/layers/attention_layer_base.py
+++ b/vllm/model_executor/layers/attention_layer_base.py
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Base class for attention-like layers."""
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from vllm.attention.backends.abstract import AttentionBackend
+
+
+class AttentionLayerBase(ABC):
+    """
+    Base class for attention-like layers (Attention, Mamba, etc.) 
+    that support the v1 engine.
+    
+    This provides a common interface for getting attention backends 
+    from different layer types.
+    """
+
+    @abstractmethod
+    def get_attn_backend(self) -> type["AttentionBackend"]:
+        """Get the attention backend class for this layer."""
+        pass