[Misc] Refactor get_kv_cache_spec into AttentionLayerBase (#26587)

Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
Nicolò Lucchesi
2025-10-18 15:51:21 +02:00
committed by GitHub
parent ab4be40fc5
commit b26b70bec4
10 changed files with 151 additions and 118 deletions

View File

@@ -5,6 +5,9 @@
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING
from vllm.config import VllmConfig
from vllm.v1.kv_cache_interface import KVCacheSpec
if TYPE_CHECKING:
from vllm.attention.backends.abstract import AttentionBackend
@@ -22,3 +25,11 @@ class AttentionLayerBase(ABC):
def get_attn_backend(self) -> type["AttentionBackend"]:
"""Get the attention backend class for this layer."""
pass
@abstractmethod
def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec | None:
"""
Get the KV cache spec for this layer.
May be None if the layer does not need KV cache.
"""
pass