[Prefix Cache] Add reproducible prefix-cache block hashing using SHA-256 + CBOR (64bit) (#20511)

Signed-off-by: Maroon Ayoub <maroon.ayoub@ibm.com>
This commit is contained in:
Maroon Ayoub
2025-07-14 05:45:31 +03:00
committed by GitHub
parent 8632e831ba
commit 66f6fbd393
8 changed files with 88 additions and 28 deletions

View File

@@ -7,10 +7,10 @@ from typing import Optional
from vllm.distributed.kv_events import KVCacheEvent
from vllm.logger import init_logger
from vllm.utils import sha256
from vllm.utils import sha256, sha256_cbor_64bit
from vllm.v1.core.kv_cache_coordinator import get_kv_cache_coordinator
from vllm.v1.core.kv_cache_utils import (BlockHash, KVCacheBlock,
hash_request_tokens)
hash_request_tokens, init_none_hash)
from vllm.v1.kv_cache_interface import KVCacheConfig
from vllm.v1.metrics.stats import PrefixCacheStats
from vllm.v1.request import Request, RequestStatus
@@ -79,7 +79,10 @@ class KVCacheManager:
self.max_model_len = max_model_len
self.enable_caching = enable_caching
self.caching_hash_fn = sha256 if caching_hash_algo == "sha256" else hash
self.caching_hash_fn = (
sha256_cbor_64bit if caching_hash_algo == "sha256_cbor_64bit" else
sha256 if caching_hash_algo == "sha256" else hash)
init_none_hash(self.caching_hash_fn)
self.use_eagle = use_eagle
self.log_stats = log_stats
# FIXME: make prefix cache stats conditional on log_stats

View File

@@ -10,7 +10,7 @@ from typing import Any, Callable, NamedTuple, Optional
from vllm.config import VllmConfig
from vllm.logger import init_logger
from vllm.utils import GiB_bytes, cdiv, sha256
from vllm.utils import GiB_bytes, cdiv, sha256_cbor_64bit
from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
KVCacheGroupSpec, KVCacheSpec,
KVCacheTensor, SlidingWindowSpec)
@@ -46,18 +46,30 @@ class BlockHashWithGroupId(NamedTuple):
return self.block_hash.hash_value
# The hash seed for the first block of the prefix block sequence.
#
# Even if the hash function is the builtin hash(), we use sha256 to generate
# the initial hash to simplify the code. This is not performance critical
# as it is done one per process.
# The hash seed for the first block of any prefix block sequence.
#
# We use a random value to avoid hash collisions or PYTHONHASHSEED environment
# variable if set such that processes can share the seed if needed.
# This aligns with the behavior of Python's hash() function, which also uses
# a random seed if PYTHONHASHSEED is not set.
NONE_HASH = int.from_bytes(os.urandom(32), byteorder="big") if os.getenv(
"PYTHONHASHSEED") is None else sha256(os.getenv("PYTHONHASHSEED"))
#
# The function `init_none_hash` initializes this variable globally.
NONE_HASH: int
def init_none_hash(hash_fn: Callable):
global NONE_HASH
hash_seed = os.getenv("PYTHONHASHSEED")
if hash_seed is None and hash_fn is sha256_cbor_64bit:
logger.warning(
"PYTHONHASHSEED is not set. This will lead to non-reproducible "
"block-hashes when using sha256_cbor_64bit as the hash function."
"Consider setting PYTHONHASHSEED to a fixed value for "
"reproducibility.")
NONE_HASH = (int.from_bytes(os.urandom(32), byteorder="big")
if hash_seed is None else hash_fn(hash_seed))
class PrefixCachingMetrics: