[V0 Deprecation] Refactor kv cache from list to element (#37487)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wentao Ye
2026-03-23 23:10:11 -04:00
committed by GitHub
parent de99d91ece
commit c59a132f96
27 changed files with 70 additions and 85 deletions

View File

@@ -481,13 +481,9 @@ class AiterFlashAttentionMetadataBuilder(
):
layers = get_layers_from_vllm_config(self.vllm_config, Attention)
first_layer_name = [k for k in layers][0]
kv_cache_shape = (
self.vllm_config.compilation_config.static_forward_context[
first_layer_name
]
.kv_cache[0]
.shape
)
kv_cache_shape = self.vllm_config.compilation_config.static_forward_context[
first_layer_name
].kv_cache.shape
num_blocks = kv_cache_shape[1]
self.scale = torch.ones(
[num_blocks, self.num_heads_kv, self.block_size],