[Core] Use sha256 bytes instead of BlockHash to reduce GC overhead (#23673)

Signed-off-by: linzebing <linzebing1995@gmail.com>
2025-09-09 00:34:37 -04:00
parent bba1042c6f
commit 82dfb12e52
15 changed files with 298 additions and 283 deletions
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -171,6 +171,7 @@ if TYPE_CHECKING:
    VLLM_GPT_OSS_USE_CONTAINER_TOOL: bool = False
    VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False
    VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False
+    VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True


 def get_default_cache_root():
@@ -1215,6 +1216,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # Add optional custom scopes for profiling, disable to avoid overheads
    "VLLM_CUSTOM_SCOPES_FOR_PROFILING":
    lambda: bool(int(os.getenv("VLLM_CUSTOM_SCOPES_FOR_PROFILING", "0"))),
+
+    # Represent block hashes in KV cache events as 64-bit integers instead of
+    # raw bytes. Defaults to True for backward compatibility.
+    "VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES":
+    lambda: bool(int(os.getenv("VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES", "1"))),
 }

 # --8<-- [end:env-vars-definition]