[Misc] Refactor get_kv_cache_spec into AttentionLayerBase (#26587)

Signed-off-by: NickLucche <nlucches@redhat.com>
2025-10-18 15:51:21 +02:00
parent ab4be40fc5
commit b26b70bec4
10 changed files with 151 additions and 118 deletions
--- a/vllm/model_executor/layers/attention_layer_base.py
+++ b/vllm/model_executor/layers/attention_layer_base.py
@@ -5,6 +5,9 @@
 from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING

+from vllm.config import VllmConfig
+from vllm.v1.kv_cache_interface import KVCacheSpec
+
 if TYPE_CHECKING:
    from vllm.attention.backends.abstract import AttentionBackend

@@ -22,3 +25,11 @@ class AttentionLayerBase(ABC):
    def get_attn_backend(self) -> type["AttentionBackend"]:
        """Get the attention backend class for this layer."""
        pass
+
+    @abstractmethod
+    def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec | None:
+        """
+        Get the KV cache spec for this layer.
+        May be None if the layer does not need KV cache.
+        """
+        pass