vllm/model_executor/layers/attention_layer_base.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Base class for attention-like layers."""

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING

from vllm.config import VllmConfig
from vllm.v1.kv_cache_interface import KVCacheSpec

if TYPE_CHECKING:
    from vllm.attention.backends.abstract import AttentionBackend


class AttentionLayerBase(ABC):
    """
    Base class for attention-like layers (Attention, Mamba, etc.)
    that support the v1 engine.

    This provides a common interface for getting attention backends
    from different layer types.
    """

    @abstractmethod
    def get_attn_backend(self) -> type["AttentionBackend"]:
        """Get the attention backend class for this layer."""
        pass

    @abstractmethod
    def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec | None:
        """
        Get the KV cache spec for this layer.
        May be None if the layer does not need KV cache.
        """
        pass
[Attention] Unify mamba and attention backend selection (#23171) Signed-off-by: Ayush Satyam <ayushsatyam146@gmail.com> 2025-08-25 14:39:36 +05:30			`# SPDX-License-Identifier: Apache-2.0`
			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`
			`"""Base class for attention-like layers."""`
Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-10-05 15:06:22 +01:00
[Attention] Unify mamba and attention backend selection (#23171) Signed-off-by: Ayush Satyam <ayushsatyam146@gmail.com> 2025-08-25 14:39:36 +05:30			`from abc import ABC, abstractmethod`
			`from typing import TYPE_CHECKING`

[Misc] Refactor `get_kv_cache_spec` into `AttentionLayerBase` (#26587) Signed-off-by: NickLucche <nlucches@redhat.com> 2025-10-18 15:51:21 +02:00			`from vllm.config import VllmConfig`
			`from vllm.v1.kv_cache_interface import KVCacheSpec`

[Attention] Unify mamba and attention backend selection (#23171) Signed-off-by: Ayush Satyam <ayushsatyam146@gmail.com> 2025-08-25 14:39:36 +05:30			`if TYPE_CHECKING:`
			`from vllm.attention.backends.abstract import AttentionBackend`


			`class AttentionLayerBase(ABC):`
			`"""`
			`Base class for attention-like layers (Attention, Mamba, etc.)`
			`that support the v1 engine.`
Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-10-05 15:06:22 +01:00
[Attention] Unify mamba and attention backend selection (#23171) Signed-off-by: Ayush Satyam <ayushsatyam146@gmail.com> 2025-08-25 14:39:36 +05:30			`This provides a common interface for getting attention backends`
			`from different layer types.`
			`"""`

			`@abstractmethod`
			`def get_attn_backend(self) -> type["AttentionBackend"]:`
			`"""Get the attention backend class for this layer."""`
			`pass`
[Misc] Refactor `get_kv_cache_spec` into `AttentionLayerBase` (#26587) Signed-off-by: NickLucche <nlucches@redhat.com> 2025-10-18 15:51:21 +02:00
			`@abstractmethod`
			`def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec \| None:`
			`"""`
			`Get the KV cache spec for this layer.`
			`May be None if the layer does not need KV cache.`
			`"""`
			`pass`