[NIXL][1/N] Refactor kernel_block_size detection (#35752)

Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
Nicolò Lucchesi
2026-03-11 09:11:23 +01:00
committed by GitHub
parent a40ee486f2
commit 098d844731
5 changed files with 130 additions and 100 deletions

View File

@@ -258,7 +258,8 @@ class AttentionGroup:
def select_common_block_size(
kv_manager_block_size: int, attn_groups: list[AttentionGroup]
kv_manager_block_size: int,
backends: list[type[AttentionBackend]],
) -> int:
"""
Select a block size that is supported by all backends and is a factor of
@@ -269,7 +270,7 @@ def select_common_block_size(
Args:
kv_manager_block_size: Block size of KV cache.
attn_groups: List of attention groups.
backends: List of attention backend classes.
Returns:
The selected block size.
@@ -297,8 +298,6 @@ def select_common_block_size(
return False
return True
backends = [group.backend for group in attn_groups]
# Case 1: if the block_size of kv cache manager is supported by all backends,
# return it directly.
if block_size_is_supported(backends, kv_manager_block_size):
@@ -356,8 +355,9 @@ def prepare_kernel_block_sizes(
if isinstance(kv_cache_spec, AttentionSpec):
# This is an attention backend that supports virtual block splitting.
kv_manager_block_size = kv_cache_group.kv_cache_spec.block_size
group_backends = [g.backend for g in attn_groups[kv_cache_gid]]
selected_kernel_size = select_common_block_size(
kv_manager_block_size, attn_groups[kv_cache_gid]
kv_manager_block_size, group_backends
)
kernel_block_sizes.append(selected_kernel_size)
elif isinstance(kv_cache_spec, MambaSpec):