[V0 Deprecation] Refactor kv cache from list to element (#37487)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
@@ -23,10 +23,10 @@ def test_bind_kv_cache(default_vllm_config):
|
||||
}
|
||||
runner_kv_caches: list[torch.Tensor] = []
|
||||
bind_kv_cache(kv_cache, ctx, runner_kv_caches)
|
||||
assert ctx["layers.0.self_attn"].kv_cache[0] is kv_cache["layers.0.self_attn"]
|
||||
assert ctx["layers.1.self_attn"].kv_cache[0] is kv_cache["layers.1.self_attn"]
|
||||
assert ctx["layers.2.self_attn"].kv_cache[0] is kv_cache["layers.2.self_attn"]
|
||||
assert ctx["layers.3.self_attn"].kv_cache[0] is kv_cache["layers.3.self_attn"]
|
||||
assert ctx["layers.0.self_attn"].kv_cache is kv_cache["layers.0.self_attn"]
|
||||
assert ctx["layers.1.self_attn"].kv_cache is kv_cache["layers.1.self_attn"]
|
||||
assert ctx["layers.2.self_attn"].kv_cache is kv_cache["layers.2.self_attn"]
|
||||
assert ctx["layers.3.self_attn"].kv_cache is kv_cache["layers.3.self_attn"]
|
||||
|
||||
assert runner_kv_caches[0] is kv_cache["layers.0.self_attn"]
|
||||
assert runner_kv_caches[1] is kv_cache["layers.1.self_attn"]
|
||||
@@ -50,8 +50,8 @@ def test_bind_kv_cache_non_attention(default_vllm_config):
|
||||
runner_kv_caches: list[torch.Tensor] = []
|
||||
bind_kv_cache(kv_cache, ctx, runner_kv_caches)
|
||||
|
||||
assert ctx["model.layers.20.attn"].kv_cache[0] is kv_cache["model.layers.20.attn"]
|
||||
assert ctx["model.layers.28.attn"].kv_cache[0] is kv_cache["model.layers.28.attn"]
|
||||
assert ctx["model.layers.20.attn"].kv_cache is kv_cache["model.layers.20.attn"]
|
||||
assert ctx["model.layers.28.attn"].kv_cache is kv_cache["model.layers.28.attn"]
|
||||
|
||||
assert runner_kv_caches[0] is kv_cache["model.layers.20.attn"]
|
||||
assert runner_kv_caches[1] is kv_cache["model.layers.28.attn"]
|
||||
@@ -74,14 +74,14 @@ def test_bind_kv_cache_draft_model(default_vllm_config):
|
||||
runner_kv_caches: list[torch.Tensor] = []
|
||||
bind_kv_cache(kv_cache, ctx, runner_kv_caches)
|
||||
|
||||
assert ctx["model.layers.0.attn"].kv_cache[0] is kv_cache["model.layers.0.attn"]
|
||||
assert ctx["model.layers.1.attn"].kv_cache[0] is kv_cache["model.layers.1.attn"]
|
||||
assert ctx["model.layers.0.attn"].kv_cache is kv_cache["model.layers.0.attn"]
|
||||
assert ctx["model.layers.1.attn"].kv_cache is kv_cache["model.layers.1.attn"]
|
||||
assert (
|
||||
ctx["draft_model.layers.0.attn"].kv_cache[0]
|
||||
ctx["draft_model.layers.0.attn"].kv_cache
|
||||
is kv_cache["draft_model.layers.0.attn"]
|
||||
)
|
||||
assert (
|
||||
ctx["draft_model.layers.1.attn"].kv_cache[0]
|
||||
ctx["draft_model.layers.1.attn"].kv_cache
|
||||
is kv_cache["draft_model.layers.1.attn"]
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user