[V1] Move KV block hashes from Request to KVCacheManager (#12922)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
@@ -12,7 +12,6 @@ from vllm.v1.utils import ConstantList
|
||||
if TYPE_CHECKING:
|
||||
from vllm.multimodal import MultiModalKwargs
|
||||
from vllm.multimodal.inputs import PlaceholderRange
|
||||
from vllm.v1.core.kv_cache_utils import BlockHashType
|
||||
|
||||
|
||||
class Request:
|
||||
@@ -63,11 +62,6 @@ class Request:
|
||||
if self.mm_hashes:
|
||||
assert len(self.mm_inputs) == len(self.mm_hashes)
|
||||
|
||||
# Cache the computed kv block hashes of the request to avoid
|
||||
# recomputing.
|
||||
self._kv_block_hashes: List[BlockHashType] = []
|
||||
self.kv_block_hashes = ConstantList(self._kv_block_hashes)
|
||||
|
||||
# Read-only views
|
||||
# Prevent directly appending to the these lists since
|
||||
# they should also be updated simultaneously.
|
||||
@@ -124,13 +118,6 @@ class Request:
|
||||
num_tokens = self.mm_positions[input_id]["length"]
|
||||
return num_tokens
|
||||
|
||||
def set_kv_block_hashes(self, value: List["BlockHashType"]) -> None:
|
||||
self._kv_block_hashes = value
|
||||
self.kv_block_hashes = ConstantList(self._kv_block_hashes)
|
||||
|
||||
def append_kv_block_hashes(self, block_hash: "BlockHashType") -> None:
|
||||
self._kv_block_hashes.append(block_hash)
|
||||
|
||||
|
||||
class RequestStatus(enum.IntEnum):
|
||||
"""Status of a request."""
|
||||
|
||||
Reference in New Issue
Block a user