[Attention] Refactor AttentionMetadata Preparation for Encoder-only Models (#23154)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
Chen Zhang
2025-08-21 22:05:59 -07:00
committed by GitHub
parent 5964069367
commit 17373dcd93
12 changed files with 226 additions and 214 deletions

View File

@@ -203,6 +203,14 @@ class MambaSpec(KVCacheSpec):
return self.page_size_bytes
@dataclass(frozen=True)
class EncoderOnlyAttentionSpec(AttentionSpec):
def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int:
# Encoder-only layers do not need KV cache
return 0
@dataclass
class KVCacheTensor:
"""