[Feature] Add FIPS 140-3 compliant hash algorithm option for multimodal hashing (#32386)

Signed-off-by: Karan Bansal <karanb192@gmail.com>
This commit is contained in:
Karan Bansal
2026-01-18 08:32:01 +05:30
committed by GitHub
parent 965765aef9
commit 3055232ba0
2 changed files with 46 additions and 3 deletions

View File

@@ -73,6 +73,7 @@ if TYPE_CHECKING:
VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25
VLLM_VIDEO_LOADER_BACKEND: str = "opencv"
VLLM_MEDIA_CONNECTOR: str = "http"
VLLM_MM_HASHER_ALGORITHM: str = "blake3"
VLLM_TARGET_DEVICE: str = "cuda"
VLLM_MAIN_CUDA_VERSION: str = "12.9"
VLLM_FLOAT32_MATMUL_PRECISION: Literal["highest", "high", "medium"] = "highest"
@@ -806,6 +807,17 @@ environment_variables: dict[str, Callable[[], Any]] = {
# imported at runtime.
# If a non-existing backend is used, an AssertionError will be thrown.
"VLLM_MEDIA_CONNECTOR": lambda: os.getenv("VLLM_MEDIA_CONNECTOR", "http"),
# Hash algorithm for multimodal content hashing.
# - "blake3": Default, fast cryptographic hash (not FIPS 140-3 compliant)
# - "sha256": FIPS 140-3 compliant, widely supported
# - "sha512": FIPS 140-3 compliant, faster on 64-bit systems
# Use sha256 or sha512 for FIPS compliance in government/enterprise deployments
"VLLM_MM_HASHER_ALGORITHM": env_with_choices(
"VLLM_MM_HASHER_ALGORITHM",
"blake3",
["blake3", "sha256", "sha512"],
case_sensitive=False,
),
# Path to the XLA persistent cache directory.
# Only used for XLA devices such as TPUs.
"VLLM_XLA_CACHE_PATH": lambda: os.path.expanduser(

View File

@@ -1,15 +1,17 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import functools
import hashlib
import pickle
import uuid
from collections.abc import Iterable
from collections.abc import Callable, Iterable
import numpy as np
import torch
from blake3 import blake3
from PIL import Image
import vllm.envs as envs
from vllm.logger import init_logger
from .media import MediaWithBytes
@@ -17,6 +19,34 @@ from .media import MediaWithBytes
logger = init_logger(__name__)
@functools.lru_cache(maxsize=3)
def _get_hasher_factory(algorithm: str) -> Callable[[], "hashlib._Hash"]:
"""
Get the hasher factory based on the configured algorithm.
Args:
algorithm: Hash algorithm name (blake3, sha256, or sha512)
Returns a callable that creates a new hasher instance.
Supports blake3 (default), sha256, and sha512 for FIPS compliance.
See: https://github.com/vllm-project/vllm/issues/18334
"""
algorithm = algorithm.lower()
if algorithm == "blake3":
from blake3 import blake3
return blake3
elif algorithm == "sha256":
return hashlib.sha256
elif algorithm == "sha512":
return hashlib.sha512
else:
# This should never happen due to env_with_choices validation
raise ValueError(f"Unsupported hash algorithm: {algorithm}")
class MultiModalHasher:
@classmethod
def serialize_item(cls, obj: object) -> Iterable[bytes | memoryview]:
@@ -114,7 +144,8 @@ class MultiModalHasher:
@classmethod
def hash_kwargs(cls, **kwargs: object) -> str:
hasher = blake3()
hasher_factory = _get_hasher_factory(envs.VLLM_MM_HASHER_ALGORITHM)
hasher = hasher_factory()
for k, v in kwargs.items():
for bytes_ in cls.iter_item_to_bytes(k, v):