2025-08-25 14:39:36 +05:30
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
|
|
|
"""Base class for attention-like layers."""
|
2025-10-05 15:06:22 +01:00
|
|
|
|
2025-08-25 14:39:36 +05:30
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
|
2025-10-18 15:51:21 +02:00
|
|
|
from vllm.config import VllmConfig
|
2026-01-09 16:10:24 -05:00
|
|
|
from vllm.v1.attention.backend import AttentionBackend, AttentionImpl
|
2025-10-18 15:51:21 +02:00
|
|
|
from vllm.v1.kv_cache_interface import KVCacheSpec
|
|
|
|
|
|
2025-08-25 14:39:36 +05:30
|
|
|
|
|
|
|
|
class AttentionLayerBase(ABC):
|
|
|
|
|
"""
|
|
|
|
|
Base class for attention-like layers (Attention, Mamba, etc.)
|
|
|
|
|
that support the v1 engine.
|
2025-10-05 15:06:22 +01:00
|
|
|
|
2025-08-25 14:39:36 +05:30
|
|
|
This provides a common interface for getting attention backends
|
|
|
|
|
from different layer types.
|
|
|
|
|
"""
|
|
|
|
|
|
2026-01-06 23:08:47 -05:00
|
|
|
impl: "AttentionImpl"
|
|
|
|
|
|
2025-08-25 14:39:36 +05:30
|
|
|
@abstractmethod
|
2025-11-27 11:19:09 -05:00
|
|
|
def get_attn_backend(self) -> type[AttentionBackend]:
|
2025-08-25 14:39:36 +05:30
|
|
|
"""Get the attention backend class for this layer."""
|
|
|
|
|
pass
|
2025-10-18 15:51:21 +02:00
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec | None:
|
|
|
|
|
"""
|
|
|
|
|
Get the KV cache spec for this layer.
|
|
|
|
|
May be None if the layer does not need KV cache.
|
|
|
|
|
"""
|
|
|
|
|
pass
|