[Core] Use sha256 bytes instead of BlockHash to reduce GC overhead (#23673)

Signed-off-by: linzebing <linzebing1995@gmail.com>
This commit is contained in:
Zebing Lin
2025-09-09 00:34:37 -04:00
committed by GitHub
parent bba1042c6f
commit 82dfb12e52
15 changed files with 298 additions and 283 deletions

View File

@@ -171,6 +171,7 @@ if TYPE_CHECKING:
VLLM_GPT_OSS_USE_CONTAINER_TOOL: bool = False
VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False
VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False
VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True
def get_default_cache_root():
@@ -1215,6 +1216,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
# Add optional custom scopes for profiling, disable to avoid overheads
"VLLM_CUSTOM_SCOPES_FOR_PROFILING":
lambda: bool(int(os.getenv("VLLM_CUSTOM_SCOPES_FOR_PROFILING", "0"))),
# Represent block hashes in KV cache events as 64-bit integers instead of
# raw bytes. Defaults to True for backward compatibility.
"VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES":
lambda: bool(int(os.getenv("VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES", "1"))),
}
# --8<-- [end:env-vars-definition]