[Prefix Cache] Add reproducible prefix-cache block hashing using SHA-256 + CBOR (64bit) (#20511)
Signed-off-by: Maroon Ayoub <maroon.ayoub@ibm.com>
This commit is contained in:
@@ -7,10 +7,10 @@ from typing import Optional
|
||||
|
||||
from vllm.distributed.kv_events import KVCacheEvent
|
||||
from vllm.logger import init_logger
|
||||
from vllm.utils import sha256
|
||||
from vllm.utils import sha256, sha256_cbor_64bit
|
||||
from vllm.v1.core.kv_cache_coordinator import get_kv_cache_coordinator
|
||||
from vllm.v1.core.kv_cache_utils import (BlockHash, KVCacheBlock,
|
||||
hash_request_tokens)
|
||||
hash_request_tokens, init_none_hash)
|
||||
from vllm.v1.kv_cache_interface import KVCacheConfig
|
||||
from vllm.v1.metrics.stats import PrefixCacheStats
|
||||
from vllm.v1.request import Request, RequestStatus
|
||||
@@ -79,7 +79,10 @@ class KVCacheManager:
|
||||
self.max_model_len = max_model_len
|
||||
|
||||
self.enable_caching = enable_caching
|
||||
self.caching_hash_fn = sha256 if caching_hash_algo == "sha256" else hash
|
||||
self.caching_hash_fn = (
|
||||
sha256_cbor_64bit if caching_hash_algo == "sha256_cbor_64bit" else
|
||||
sha256 if caching_hash_algo == "sha256" else hash)
|
||||
init_none_hash(self.caching_hash_fn)
|
||||
self.use_eagle = use_eagle
|
||||
self.log_stats = log_stats
|
||||
# FIXME: make prefix cache stats conditional on log_stats
|
||||
|
||||
@@ -10,7 +10,7 @@ from typing import Any, Callable, NamedTuple, Optional
|
||||
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.logger import init_logger
|
||||
from vllm.utils import GiB_bytes, cdiv, sha256
|
||||
from vllm.utils import GiB_bytes, cdiv, sha256_cbor_64bit
|
||||
from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
|
||||
KVCacheGroupSpec, KVCacheSpec,
|
||||
KVCacheTensor, SlidingWindowSpec)
|
||||
@@ -46,18 +46,30 @@ class BlockHashWithGroupId(NamedTuple):
|
||||
return self.block_hash.hash_value
|
||||
|
||||
|
||||
# The hash seed for the first block of the prefix block sequence.
|
||||
#
|
||||
# Even if the hash function is the builtin hash(), we use sha256 to generate
|
||||
# the initial hash to simplify the code. This is not performance critical
|
||||
# as it is done one per process.
|
||||
# The hash seed for the first block of any prefix block sequence.
|
||||
#
|
||||
# We use a random value to avoid hash collisions or PYTHONHASHSEED environment
|
||||
# variable if set such that processes can share the seed if needed.
|
||||
# This aligns with the behavior of Python's hash() function, which also uses
|
||||
# a random seed if PYTHONHASHSEED is not set.
|
||||
NONE_HASH = int.from_bytes(os.urandom(32), byteorder="big") if os.getenv(
|
||||
"PYTHONHASHSEED") is None else sha256(os.getenv("PYTHONHASHSEED"))
|
||||
#
|
||||
# The function `init_none_hash` initializes this variable globally.
|
||||
NONE_HASH: int
|
||||
|
||||
|
||||
def init_none_hash(hash_fn: Callable):
|
||||
global NONE_HASH
|
||||
|
||||
hash_seed = os.getenv("PYTHONHASHSEED")
|
||||
if hash_seed is None and hash_fn is sha256_cbor_64bit:
|
||||
logger.warning(
|
||||
"PYTHONHASHSEED is not set. This will lead to non-reproducible "
|
||||
"block-hashes when using sha256_cbor_64bit as the hash function."
|
||||
"Consider setting PYTHONHASHSEED to a fixed value for "
|
||||
"reproducibility.")
|
||||
|
||||
NONE_HASH = (int.from_bytes(os.urandom(32), byteorder="big")
|
||||
if hash_seed is None else hash_fn(hash_seed))
|
||||
|
||||
|
||||
class PrefixCachingMetrics:
|
||||
|
||||
Reference in New Issue
Block a user