[V0 Deprecation] Refactor kv cache from list to element (#37487)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
2026-03-23 23:10:11 -04:00
parent de99d91ece
commit c59a132f96
27 changed files with 70 additions and 85 deletions
--- a/tests/v1/worker/test_utils.py
+++ b/tests/v1/worker/test_utils.py
@@ -23,10 +23,10 @@ def test_bind_kv_cache(default_vllm_config):
    }
    runner_kv_caches: list[torch.Tensor] = []
    bind_kv_cache(kv_cache, ctx, runner_kv_caches)
-    assert ctx["layers.0.self_attn"].kv_cache[0] is kv_cache["layers.0.self_attn"]
-    assert ctx["layers.1.self_attn"].kv_cache[0] is kv_cache["layers.1.self_attn"]
-    assert ctx["layers.2.self_attn"].kv_cache[0] is kv_cache["layers.2.self_attn"]
-    assert ctx["layers.3.self_attn"].kv_cache[0] is kv_cache["layers.3.self_attn"]
+    assert ctx["layers.0.self_attn"].kv_cache is kv_cache["layers.0.self_attn"]
+    assert ctx["layers.1.self_attn"].kv_cache is kv_cache["layers.1.self_attn"]
+    assert ctx["layers.2.self_attn"].kv_cache is kv_cache["layers.2.self_attn"]
+    assert ctx["layers.3.self_attn"].kv_cache is kv_cache["layers.3.self_attn"]

    assert runner_kv_caches[0] is kv_cache["layers.0.self_attn"]
    assert runner_kv_caches[1] is kv_cache["layers.1.self_attn"]
@@ -50,8 +50,8 @@ def test_bind_kv_cache_non_attention(default_vllm_config):
    runner_kv_caches: list[torch.Tensor] = []
    bind_kv_cache(kv_cache, ctx, runner_kv_caches)

-    assert ctx["model.layers.20.attn"].kv_cache[0] is kv_cache["model.layers.20.attn"]
-    assert ctx["model.layers.28.attn"].kv_cache[0] is kv_cache["model.layers.28.attn"]
+    assert ctx["model.layers.20.attn"].kv_cache is kv_cache["model.layers.20.attn"]
+    assert ctx["model.layers.28.attn"].kv_cache is kv_cache["model.layers.28.attn"]

    assert runner_kv_caches[0] is kv_cache["model.layers.20.attn"]
    assert runner_kv_caches[1] is kv_cache["model.layers.28.attn"]
@@ -74,14 +74,14 @@ def test_bind_kv_cache_draft_model(default_vllm_config):
    runner_kv_caches: list[torch.Tensor] = []
    bind_kv_cache(kv_cache, ctx, runner_kv_caches)

-    assert ctx["model.layers.0.attn"].kv_cache[0] is kv_cache["model.layers.0.attn"]
-    assert ctx["model.layers.1.attn"].kv_cache[0] is kv_cache["model.layers.1.attn"]
+    assert ctx["model.layers.0.attn"].kv_cache is kv_cache["model.layers.0.attn"]
+    assert ctx["model.layers.1.attn"].kv_cache is kv_cache["model.layers.1.attn"]
    assert (
-        ctx["draft_model.layers.0.attn"].kv_cache[0]
+        ctx["draft_model.layers.0.attn"].kv_cache
        is kv_cache["draft_model.layers.0.attn"]
    )
    assert (
-        ctx["draft_model.layers.1.attn"].kv_cache[0]
+        ctx["draft_model.layers.1.attn"].kv_cache
        is kv_cache["draft_model.layers.1.attn"]
    )