[Core] Use sha256 bytes instead of BlockHash to reduce GC overhead (#23673)

Signed-off-by: linzebing <linzebing1995@gmail.com>
This commit is contained in:
Zebing Lin
2025-09-09 00:34:37 -04:00
committed by GitHub
parent bba1042c6f
commit 82dfb12e52
15 changed files with 298 additions and 283 deletions

View File

@@ -24,7 +24,7 @@ logger = init_logger(__name__)
BlockSize = Literal[1, 8, 16, 32, 64, 128]
CacheDType = Literal["auto", "fp8", "fp8_e4m3", "fp8_e5m2", "fp8_inc"]
MambaDType = Literal["auto", "float32"]
PrefixCachingHashAlgo = Literal["builtin", "sha256", "sha256_cbor_64bit"]
PrefixCachingHashAlgo = Literal["sha256", "sha256_cbor"]
@config
@@ -63,17 +63,12 @@ class CacheConfig:
"""Sliding window size for the KV cache. This is primarily set in
`ModelConfig` and that value should be manually duplicated here."""
enable_prefix_caching: Optional[bool] = None
"""Whether to enable prefix caching. Disabled by default for V0. Enabled by
default for V1."""
prefix_caching_hash_algo: PrefixCachingHashAlgo = "builtin"
"""Whether to enable prefix caching. Enabled by default for V1."""
prefix_caching_hash_algo: PrefixCachingHashAlgo = "sha256"
"""Set the hash algorithm for prefix caching:\n
- "builtin" is Python's built-in hash.\n
- "sha256" is collision resistant but with certain overheads.
This option uses Pickle for object serialization before hashing.\n
- "sha256_cbor_64bit" provides a reproducible, cross-language compatible
hash. It serializes objects using canonical CBOR and hashes them with
SHA-256. The resulting hash consists of the lower 64 bits of the SHA-256
digest."""
- "sha256" uses Pickle for object serialization before hashing.\n
- "sha256_cbor" provides a reproducible, cross-language compatible hash. It
serializes objects using canonical CBOR and hashes them with SHA-256."""
cpu_offload_gb: float = 0
"""The space in GiB to offload to CPU, per GPU. Default is 0, which means
no offloading. Intuitively, this argument can be seen as a virtual way to