[BUGFIX][Mamba][Qwen3.5] Zero freed SSM cache blocks on GPU (#35219)
Signed-off-by: Vadim Gimpelson <vadim.gimpelson@gmail.com>
This commit is contained in:
@@ -86,6 +86,26 @@ class AttentionBackend(ABC):
|
||||
) -> tuple[int, ...]:
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def get_kv_cache_block_dim(
|
||||
cls,
|
||||
block_size: int,
|
||||
num_kv_heads: int,
|
||||
head_size: int,
|
||||
cache_dtype_str: str = "auto",
|
||||
) -> int:
|
||||
"""Discover which tensor dim is the block index, since different
|
||||
backends lay out dims differently."""
|
||||
_S = 1234567
|
||||
shape = cls.get_kv_cache_shape(
|
||||
_S,
|
||||
block_size,
|
||||
num_kv_heads,
|
||||
head_size,
|
||||
cache_dtype_str=cache_dtype_str,
|
||||
)
|
||||
return shape.index(_S)
|
||||
|
||||
@staticmethod
|
||||
def get_kv_cache_stride_order(
|
||||
include_num_layers_dimension: bool = False,
|
||||
|
||||
Reference in New Issue
Block a user