[NIXL][1/N] Refactor kernel_block_size detection (#35752)

Signed-off-by: NickLucche <nlucches@redhat.com>
2026-03-11 09:11:23 +01:00
parent a40ee486f2
commit 098d844731
5 changed files with 130 additions and 100 deletions
--- a/vllm/v1/worker/utils.py
+++ b/vllm/v1/worker/utils.py
@@ -258,7 +258,8 @@ class AttentionGroup:


 def select_common_block_size(
-    kv_manager_block_size: int, attn_groups: list[AttentionGroup]
+    kv_manager_block_size: int,
+    backends: list[type[AttentionBackend]],
 ) -> int:
    """
    Select a block size that is supported by all backends and is a factor of
@@ -269,7 +270,7 @@ def select_common_block_size(

    Args:
        kv_manager_block_size: Block size of KV cache.
-        attn_groups: List of attention groups.
+        backends: List of attention backend classes.

    Returns:
        The selected block size.
@@ -297,8 +298,6 @@ def select_common_block_size(
                return False
        return True

-    backends = [group.backend for group in attn_groups]
-
    # Case 1: if the block_size of kv cache manager is supported by all backends,
    # return it directly.
    if block_size_is_supported(backends, kv_manager_block_size):
@@ -356,8 +355,9 @@ def prepare_kernel_block_sizes(
        if isinstance(kv_cache_spec, AttentionSpec):
            # This is an attention backend that supports virtual block splitting.
            kv_manager_block_size = kv_cache_group.kv_cache_spec.block_size
+            group_backends = [g.backend for g in attn_groups[kv_cache_gid]]
            selected_kernel_size = select_common_block_size(
-                kv_manager_block_size, attn_groups[kv_cache_gid]
+                kv_manager_block_size, group_backends
            )
            kernel_block_sizes.append(selected_kernel_size)
        elif isinstance(kv_cache_spec, MambaSpec):