Files
vllm/vllm/v1/attention/backends/short_conv_attn.py
Chendi.Xue 3b1dbaad4e [HMA]Fix corner case when hybrid page_size can not be evenly divided issue (blk_size=64,tp=4) (#37467)
Signed-off-by: Chendi Xue <chendi.xue@intel.com>
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
Signed-off-by: Chendi.Xue <chendi.xue@intel.com>
Co-authored-by: Matthew Bonanni <mbonanni@redhat.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
2026-03-30 16:47:30 +00:00

35 lines
902 B
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from dataclasses import dataclass
from vllm.v1.attention.backend import AttentionBackend
from vllm.v1.attention.backends.mamba_attn import (
BaseMambaAttentionMetadata,
BaseMambaAttentionMetadataBuilder,
)
class ShortConvAttentionBackend(AttentionBackend):
@staticmethod
def get_name() -> str:
return "SHORT_CONV_ATTN"
@staticmethod
def get_builder_cls() -> type["ShortConvAttentionMetadataBuilder"]:
return ShortConvAttentionMetadataBuilder
@classmethod
def is_ssm(cls) -> bool:
return True
@dataclass
class ShortConvAttentionMetadata(BaseMambaAttentionMetadata):
pass
class ShortConvAttentionMetadataBuilder(
BaseMambaAttentionMetadataBuilder[ShortConvAttentionMetadata]
):
metadata_cls = ShortConvAttentionMetadata