[Feature] Add FIPS 140-3 compliant hash algorithm option for multimodal hashing (#32386)
Signed-off-by: Karan Bansal <karanb192@gmail.com>
This commit is contained in:
12
vllm/envs.py
12
vllm/envs.py
@@ -73,6 +73,7 @@ if TYPE_CHECKING:
|
||||
VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25
|
||||
VLLM_VIDEO_LOADER_BACKEND: str = "opencv"
|
||||
VLLM_MEDIA_CONNECTOR: str = "http"
|
||||
VLLM_MM_HASHER_ALGORITHM: str = "blake3"
|
||||
VLLM_TARGET_DEVICE: str = "cuda"
|
||||
VLLM_MAIN_CUDA_VERSION: str = "12.9"
|
||||
VLLM_FLOAT32_MATMUL_PRECISION: Literal["highest", "high", "medium"] = "highest"
|
||||
@@ -806,6 +807,17 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
# imported at runtime.
|
||||
# If a non-existing backend is used, an AssertionError will be thrown.
|
||||
"VLLM_MEDIA_CONNECTOR": lambda: os.getenv("VLLM_MEDIA_CONNECTOR", "http"),
|
||||
# Hash algorithm for multimodal content hashing.
|
||||
# - "blake3": Default, fast cryptographic hash (not FIPS 140-3 compliant)
|
||||
# - "sha256": FIPS 140-3 compliant, widely supported
|
||||
# - "sha512": FIPS 140-3 compliant, faster on 64-bit systems
|
||||
# Use sha256 or sha512 for FIPS compliance in government/enterprise deployments
|
||||
"VLLM_MM_HASHER_ALGORITHM": env_with_choices(
|
||||
"VLLM_MM_HASHER_ALGORITHM",
|
||||
"blake3",
|
||||
["blake3", "sha256", "sha512"],
|
||||
case_sensitive=False,
|
||||
),
|
||||
# Path to the XLA persistent cache directory.
|
||||
# Only used for XLA devices such as TPUs.
|
||||
"VLLM_XLA_CACHE_PATH": lambda: os.path.expanduser(
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import functools
|
||||
import hashlib
|
||||
import pickle
|
||||
import uuid
|
||||
from collections.abc import Iterable
|
||||
from collections.abc import Callable, Iterable
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from blake3 import blake3
|
||||
from PIL import Image
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.logger import init_logger
|
||||
|
||||
from .media import MediaWithBytes
|
||||
@@ -17,6 +19,34 @@ from .media import MediaWithBytes
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=3)
|
||||
def _get_hasher_factory(algorithm: str) -> Callable[[], "hashlib._Hash"]:
|
||||
"""
|
||||
Get the hasher factory based on the configured algorithm.
|
||||
|
||||
Args:
|
||||
algorithm: Hash algorithm name (blake3, sha256, or sha512)
|
||||
|
||||
Returns a callable that creates a new hasher instance.
|
||||
Supports blake3 (default), sha256, and sha512 for FIPS compliance.
|
||||
|
||||
See: https://github.com/vllm-project/vllm/issues/18334
|
||||
"""
|
||||
algorithm = algorithm.lower()
|
||||
|
||||
if algorithm == "blake3":
|
||||
from blake3 import blake3
|
||||
|
||||
return blake3
|
||||
elif algorithm == "sha256":
|
||||
return hashlib.sha256
|
||||
elif algorithm == "sha512":
|
||||
return hashlib.sha512
|
||||
else:
|
||||
# This should never happen due to env_with_choices validation
|
||||
raise ValueError(f"Unsupported hash algorithm: {algorithm}")
|
||||
|
||||
|
||||
class MultiModalHasher:
|
||||
@classmethod
|
||||
def serialize_item(cls, obj: object) -> Iterable[bytes | memoryview]:
|
||||
@@ -114,7 +144,8 @@ class MultiModalHasher:
|
||||
|
||||
@classmethod
|
||||
def hash_kwargs(cls, **kwargs: object) -> str:
|
||||
hasher = blake3()
|
||||
hasher_factory = _get_hasher_factory(envs.VLLM_MM_HASHER_ALGORITHM)
|
||||
hasher = hasher_factory()
|
||||
|
||||
for k, v in kwargs.items():
|
||||
for bytes_ in cls.iter_item_to_bytes(k, v):
|
||||
|
||||
Reference in New Issue
Block a user