[v1] Move block management logic from KVCacheManager to SpecializedManager (#17474)
Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
@@ -8,6 +8,14 @@ from vllm.v1.core.specialized_manager import SlidingWindowManager
|
||||
from vllm.v1.kv_cache_interface import SlidingWindowSpec
|
||||
|
||||
|
||||
def get_sliding_window_manager(sliding_window_spec, block_pool):
|
||||
return SlidingWindowManager(sliding_window_spec,
|
||||
block_pool,
|
||||
use_eagle=False,
|
||||
num_kv_cache_groups=1,
|
||||
caching_hash_fn=lambda x: x)
|
||||
|
||||
|
||||
def test_sliding_window_possible_cached_prefix():
|
||||
sliding_window_spec = SlidingWindowSpec(
|
||||
block_size=2,
|
||||
@@ -19,9 +27,7 @@ def test_sliding_window_possible_cached_prefix():
|
||||
)
|
||||
|
||||
block_pool = BlockPool(num_gpu_blocks=100, enable_caching=True)
|
||||
manager = SlidingWindowManager(sliding_window_spec,
|
||||
block_pool,
|
||||
use_eagle=False)
|
||||
manager = get_sliding_window_manager(sliding_window_spec, block_pool)
|
||||
|
||||
def run_one_case(block_is_cached, expect_length):
|
||||
block_hash_list = [
|
||||
@@ -81,9 +87,7 @@ def test_sliding_window_remove_skipped_blocks():
|
||||
|
||||
block_pool = BlockPool(num_gpu_blocks=2000, enable_caching=True)
|
||||
|
||||
manager = SlidingWindowManager(sliding_window_spec,
|
||||
block_pool,
|
||||
use_eagle=False)
|
||||
manager = get_sliding_window_manager(sliding_window_spec, block_pool)
|
||||
|
||||
null_block_id = block_pool.null_block.block_id
|
||||
|
||||
@@ -104,39 +108,35 @@ def test_sliding_window_remove_skipped_blocks():
|
||||
1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010
|
||||
]
|
||||
block_table = id_to_block_table(original_block_ids)
|
||||
removed = manager.remove_skipped_blocks(block_table, 0)
|
||||
assert_block_id(removed, [])
|
||||
manager.req_to_blocks["test"] = block_table
|
||||
|
||||
manager.remove_skipped_blocks("test", 0)
|
||||
assert_block_id(block_table, original_block_ids)
|
||||
|
||||
# 4 tokens are computed. Only token 0 is out of the sliding window. As
|
||||
# block 1000 also contains token 1 that is in the sliding window, block 1000
|
||||
# cannot be removed.
|
||||
removed = manager.remove_skipped_blocks(block_table, 4)
|
||||
assert_block_id(removed, [])
|
||||
manager.remove_skipped_blocks("test", 4)
|
||||
assert_block_id(block_table, original_block_ids)
|
||||
|
||||
# 5 tokens are computed. Token 0 & 1 are out of the sliding window.
|
||||
# Block 1000 can be removed.
|
||||
removed = manager.remove_skipped_blocks(block_table, 5)
|
||||
assert_block_id(removed, [original_block_ids[0]])
|
||||
manager.remove_skipped_blocks("test", 5)
|
||||
assert_block_id(block_table, [null_block_id] + original_block_ids[1:])
|
||||
|
||||
# 6 tokens are computed. Token 0-2 are out of the sliding window.
|
||||
# Cannot remove new block as the block 1001 is still used by token 3.
|
||||
removed = manager.remove_skipped_blocks(block_table, 6)
|
||||
assert_block_id(removed, [])
|
||||
manager.remove_skipped_blocks("test", 6)
|
||||
assert_block_id(block_table, [null_block_id] + original_block_ids[1:])
|
||||
|
||||
# 7 tokens are computed. Token 0-3 are out of the sliding window.
|
||||
# Block 1001 can be removed and block 1000 is already removed.
|
||||
removed = manager.remove_skipped_blocks(block_table, 7)
|
||||
assert_block_id(removed, [original_block_ids[1]])
|
||||
manager.remove_skipped_blocks("test", 7)
|
||||
assert_block_id(block_table, [null_block_id] * 2 + original_block_ids[2:])
|
||||
|
||||
# 11 tokens are computed. Token 0-7 are out of the sliding window.
|
||||
# Block 1002 & 1003 can be removed now. Block 1003 represents a longer
|
||||
# sequence, and is expected to be evicted earlier than 1002, so the order
|
||||
# of removed blocks should be [1003, 1002].
|
||||
removed = manager.remove_skipped_blocks(block_table, 11)
|
||||
assert_block_id(removed, [original_block_ids[3], original_block_ids[2]])
|
||||
manager.remove_skipped_blocks("test", 11)
|
||||
assert_block_id(block_table, [null_block_id] * 4 + original_block_ids[4:])
|
||||
|
||||
Reference in New Issue
Block a user