[V0 Deprecation] Refactor kv cache from list to element (#37487)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wentao Ye
2026-03-23 23:10:11 -04:00
committed by GitHub
parent de99d91ece
commit c59a132f96
27 changed files with 70 additions and 85 deletions

View File

@@ -23,10 +23,10 @@ def test_bind_kv_cache(default_vllm_config):
}
runner_kv_caches: list[torch.Tensor] = []
bind_kv_cache(kv_cache, ctx, runner_kv_caches)
assert ctx["layers.0.self_attn"].kv_cache[0] is kv_cache["layers.0.self_attn"]
assert ctx["layers.1.self_attn"].kv_cache[0] is kv_cache["layers.1.self_attn"]
assert ctx["layers.2.self_attn"].kv_cache[0] is kv_cache["layers.2.self_attn"]
assert ctx["layers.3.self_attn"].kv_cache[0] is kv_cache["layers.3.self_attn"]
assert ctx["layers.0.self_attn"].kv_cache is kv_cache["layers.0.self_attn"]
assert ctx["layers.1.self_attn"].kv_cache is kv_cache["layers.1.self_attn"]
assert ctx["layers.2.self_attn"].kv_cache is kv_cache["layers.2.self_attn"]
assert ctx["layers.3.self_attn"].kv_cache is kv_cache["layers.3.self_attn"]
assert runner_kv_caches[0] is kv_cache["layers.0.self_attn"]
assert runner_kv_caches[1] is kv_cache["layers.1.self_attn"]
@@ -50,8 +50,8 @@ def test_bind_kv_cache_non_attention(default_vllm_config):
runner_kv_caches: list[torch.Tensor] = []
bind_kv_cache(kv_cache, ctx, runner_kv_caches)
assert ctx["model.layers.20.attn"].kv_cache[0] is kv_cache["model.layers.20.attn"]
assert ctx["model.layers.28.attn"].kv_cache[0] is kv_cache["model.layers.28.attn"]
assert ctx["model.layers.20.attn"].kv_cache is kv_cache["model.layers.20.attn"]
assert ctx["model.layers.28.attn"].kv_cache is kv_cache["model.layers.28.attn"]
assert runner_kv_caches[0] is kv_cache["model.layers.20.attn"]
assert runner_kv_caches[1] is kv_cache["model.layers.28.attn"]
@@ -74,14 +74,14 @@ def test_bind_kv_cache_draft_model(default_vllm_config):
runner_kv_caches: list[torch.Tensor] = []
bind_kv_cache(kv_cache, ctx, runner_kv_caches)
assert ctx["model.layers.0.attn"].kv_cache[0] is kv_cache["model.layers.0.attn"]
assert ctx["model.layers.1.attn"].kv_cache[0] is kv_cache["model.layers.1.attn"]
assert ctx["model.layers.0.attn"].kv_cache is kv_cache["model.layers.0.attn"]
assert ctx["model.layers.1.attn"].kv_cache is kv_cache["model.layers.1.attn"]
assert (
ctx["draft_model.layers.0.attn"].kv_cache[0]
ctx["draft_model.layers.0.attn"].kv_cache
is kv_cache["draft_model.layers.0.attn"]
)
assert (
ctx["draft_model.layers.1.attn"].kv_cache[0]
ctx["draft_model.layers.1.attn"].kv_cache
is kv_cache["draft_model.layers.1.attn"]
)