[V1] Move KV block hashes from Request to KVCacheManager (#12922)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-02-07 19:14:10 -08:00
parent b21f0f9d17
commit 3243158336
4 changed files with 35 additions and 31 deletions
--- a/vllm/v1/request.py
+++ b/vllm/v1/request.py
@@ -12,7 +12,6 @@ from vllm.v1.utils import ConstantList
 if TYPE_CHECKING:
    from vllm.multimodal import MultiModalKwargs
    from vllm.multimodal.inputs import PlaceholderRange
-    from vllm.v1.core.kv_cache_utils import BlockHashType


 class Request:
@@ -63,11 +62,6 @@ class Request:
        if self.mm_hashes:
            assert len(self.mm_inputs) == len(self.mm_hashes)

-        # Cache the computed kv block hashes of the request to avoid
-        # recomputing.
-        self._kv_block_hashes: List[BlockHashType] = []
-        self.kv_block_hashes = ConstantList(self._kv_block_hashes)
-
        # Read-only views
        # Prevent directly appending to the these lists since
        # they should also be updated simultaneously.
@@ -124,13 +118,6 @@ class Request:
        num_tokens = self.mm_positions[input_id]["length"]
        return num_tokens

-    def set_kv_block_hashes(self, value: List["BlockHashType"]) -> None:
-        self._kv_block_hashes = value
-        self.kv_block_hashes = ConstantList(self._kv_block_hashes)
-
-    def append_kv_block_hashes(self, block_hash: "BlockHashType") -> None:
-        self._kv_block_hashes.append(block_hash)
-

 class RequestStatus(enum.IntEnum):
    """Status of a request."""