[v1] Pass BlockTable and KVCacheSpec to AttentionMetadataBuilders (#17483)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
2025-05-11 07:12:04 +08:00
parent 4c31218f80
commit 950751a987
11 changed files with 132 additions and 68 deletions
--- a/tests/v1/worker/test_gpu_input_batch.py
+++ b/tests/v1/worker/test_gpu_input_batch.py
@@ -221,6 +221,7 @@ def test_sampling_metadata_in_input_batch(device: str, batch_size: int):
        max_num_reqs=batch_size,
        max_model_len=1024,
        max_num_blocks_per_req=10,
+        max_num_batched_tokens=1024,
        device=torch.device(device),
        pin_memory=is_pin_memory_available(),
        vocab_size=1024,
@@ -310,6 +311,7 @@ def test_swap_states_in_input_batch(device: str, batch_size: int,
        max_num_reqs=batch_size,
        max_model_len=1024,
        max_num_blocks_per_req=10,
+        max_num_batched_tokens=1024,
        device=torch.device(device),
        pin_memory=is_pin_memory_available(),
        vocab_size=1024,
@@ -318,6 +320,7 @@ def test_swap_states_in_input_batch(device: str, batch_size: int,
        max_num_reqs=batch_size,
        max_model_len=1024,
        max_num_blocks_per_req=10,
+        max_num_batched_tokens=1024,
        device=torch.device(device),
        pin_memory=is_pin_memory_available(),
        vocab_size=1024,