[v1] Redo "Support multiple KV cache groups in GPU model runner (#17945)" (#18593)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
2025-05-24 00:39:47 +08:00
parent 9520a989df
commit 6550114c9c
15 changed files with 469 additions and 203 deletions
--- a/vllm/v1/worker/block_table.py
+++ b/vllm/v1/worker/block_table.py
@@ -4,6 +4,7 @@ import numpy as np
 import torch

 from vllm.logger import init_logger
+from vllm.utils import cdiv

 logger = init_logger(__name__)

@@ -96,3 +97,43 @@ class BlockTable:
    def get_numpy_array(self) -> np.ndarray:
        """Returns the numpy array of the block table."""
        return self.block_table_np
+
+
+class MultiGroupBlockTable:
+    """The BlockTables for each KV cache group."""
+
+    def __init__(self, max_num_reqs: int, max_model_len: int,
+                 max_num_batched_tokens: int, pin_memory: bool,
+                 device: torch.device, block_size: int) -> None:
+        self.block_tables = [
+            BlockTable(max_num_reqs, cdiv(max_model_len, block_size),
+                       max_num_batched_tokens, pin_memory, device)
+        ]
+
+    def append_row(self, block_ids: list[list[int]], row_idx: int) -> None:
+        for i, block_table in enumerate(self.block_tables):
+            block_table.append_row(block_ids[i], row_idx)
+
+    def add_row(self, block_ids: list[list[int]], row_idx: int) -> None:
+        for i, block_table in enumerate(self.block_tables):
+            block_table.add_row(block_ids[i], row_idx)
+
+    def move_row(self, src: int, tgt: int) -> None:
+        for block_table in self.block_tables:
+            block_table.move_row(src, tgt)
+
+    def swap_row(self, src: int, tgt: int) -> None:
+        for block_table in self.block_tables:
+            block_table.swap_row(src, tgt)
+
+    def commit(self, num_reqs: int) -> None:
+        for block_table in self.block_tables:
+            block_table.commit(num_reqs)
+
+    def clear(self) -> None:
+        for block_table in self.block_tables:
+            block_table.clear()
+
+    def __getitem__(self, idx: int) -> "BlockTable":
+        """Returns the BlockTable for the i-th KV cache group."""
+        return self.block_tables[idx]