[V1] Move KV block hashes from Request to KVCacheManager (#12922)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon
2025-02-07 19:14:10 -08:00
committed by GitHub
parent b21f0f9d17
commit 3243158336
4 changed files with 35 additions and 31 deletions

View File

@@ -12,7 +12,6 @@ from vllm.v1.utils import ConstantList
if TYPE_CHECKING:
from vllm.multimodal import MultiModalKwargs
from vllm.multimodal.inputs import PlaceholderRange
from vllm.v1.core.kv_cache_utils import BlockHashType
class Request:
@@ -63,11 +62,6 @@ class Request:
if self.mm_hashes:
assert len(self.mm_inputs) == len(self.mm_hashes)
# Cache the computed kv block hashes of the request to avoid
# recomputing.
self._kv_block_hashes: List[BlockHashType] = []
self.kv_block_hashes = ConstantList(self._kv_block_hashes)
# Read-only views
# Prevent directly appending to the these lists since
# they should also be updated simultaneously.
@@ -124,13 +118,6 @@ class Request:
num_tokens = self.mm_positions[input_id]["length"]
return num_tokens
def set_kv_block_hashes(self, value: List["BlockHashType"]) -> None:
self._kv_block_hashes = value
self.kv_block_hashes = ConstantList(self._kv_block_hashes)
def append_kv_block_hashes(self, block_hash: "BlockHashType") -> None:
self._kv_block_hashes.append(block_hash)
class RequestStatus(enum.IntEnum):
"""Status of a request."""