[V0 Deprecation] Refactor kv cache from list to element (#37487)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wentao Ye
2026-03-23 23:10:11 -04:00
committed by GitHub
parent de99d91ece
commit c59a132f96
27 changed files with 70 additions and 85 deletions

View File

@@ -258,8 +258,8 @@ def get_fake_execute_model_fn(original_execute_model_fn: Callable):
mamba_kv_cache_dict[
num_computed_tokens - num_computed_tokens % BLOCK_SIZE
] = (
kv_cache[0][0][block_id].clone(),
kv_cache[0][1][block_id].clone(),
kv_cache[0][block_id].clone(),
kv_cache[1][block_id].clone(),
)
last_num_computed_tokens = num_computed_tokens
@@ -302,7 +302,7 @@ def get_fake_process_mamba_fn(
mamba_layer_name = kv_cache_config.kv_cache_groups[
mamba_group_id
].layer_names[0]
mamba_kv_cache = forward_context[mamba_layer_name].kv_cache[0][-1]
mamba_kv_cache = forward_context[mamba_layer_name].kv_cache[-1]
mamba_block_table = input_batch.block_table.block_tables[
mamba_group_id
].block_table.cpu[0]