[Feature] Add FIPS 140-3 compliant hash algorithm option for multimodal hashing (#32386)

Signed-off-by: Karan Bansal <karanb192@gmail.com>
2026-01-18 08:32:01 +05:30
parent 965765aef9
commit 3055232ba0
2 changed files with 46 additions and 3 deletions
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -73,6 +73,7 @@ if TYPE_CHECKING:
    VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25
    VLLM_VIDEO_LOADER_BACKEND: str = "opencv"
    VLLM_MEDIA_CONNECTOR: str = "http"
+    VLLM_MM_HASHER_ALGORITHM: str = "blake3"
    VLLM_TARGET_DEVICE: str = "cuda"
    VLLM_MAIN_CUDA_VERSION: str = "12.9"
    VLLM_FLOAT32_MATMUL_PRECISION: Literal["highest", "high", "medium"] = "highest"
@@ -806,6 +807,17 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # imported at runtime.
    # If a non-existing backend is used, an AssertionError will be thrown.
    "VLLM_MEDIA_CONNECTOR": lambda: os.getenv("VLLM_MEDIA_CONNECTOR", "http"),
+    # Hash algorithm for multimodal content hashing.
+    # - "blake3": Default, fast cryptographic hash (not FIPS 140-3 compliant)
+    # - "sha256": FIPS 140-3 compliant, widely supported
+    # - "sha512": FIPS 140-3 compliant, faster on 64-bit systems
+    # Use sha256 or sha512 for FIPS compliance in government/enterprise deployments
+    "VLLM_MM_HASHER_ALGORITHM": env_with_choices(
+        "VLLM_MM_HASHER_ALGORITHM",
+        "blake3",
+        ["blake3", "sha256", "sha512"],
+        case_sensitive=False,
+    ),
    # Path to the XLA persistent cache directory.
    # Only used for XLA devices such as TPUs.
    "VLLM_XLA_CACHE_PATH": lambda: os.path.expanduser(
--- a/vllm/multimodal/hasher.py
+++ b/vllm/multimodal/hasher.py
@@ -1,15 +1,17 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

+import functools
+import hashlib
 import pickle
 import uuid
-from collections.abc import Iterable
+from collections.abc import Callable, Iterable

 import numpy as np
 import torch
-from blake3 import blake3
 from PIL import Image

+import vllm.envs as envs
 from vllm.logger import init_logger

 from .media import MediaWithBytes
@@ -17,6 +19,34 @@ from .media import MediaWithBytes
 logger = init_logger(__name__)


+@functools.lru_cache(maxsize=3)
+def _get_hasher_factory(algorithm: str) -> Callable[[], "hashlib._Hash"]:
+    """
+    Get the hasher factory based on the configured algorithm.
+
+    Args:
+        algorithm: Hash algorithm name (blake3, sha256, or sha512)
+
+    Returns a callable that creates a new hasher instance.
+    Supports blake3 (default), sha256, and sha512 for FIPS compliance.
+
+    See: https://github.com/vllm-project/vllm/issues/18334
+    """
+    algorithm = algorithm.lower()
+
+    if algorithm == "blake3":
+        from blake3 import blake3
+
+        return blake3
+    elif algorithm == "sha256":
+        return hashlib.sha256
+    elif algorithm == "sha512":
+        return hashlib.sha512
+    else:
+        # This should never happen due to env_with_choices validation
+        raise ValueError(f"Unsupported hash algorithm: {algorithm}")
+
+
 class MultiModalHasher:
    @classmethod
    def serialize_item(cls, obj: object) -> Iterable[bytes | memoryview]:
@@ -114,7 +144,8 @@ class MultiModalHasher:

    @classmethod
    def hash_kwargs(cls, **kwargs: object) -> str:
-        hasher = blake3()
+        hasher_factory = _get_hasher_factory(envs.VLLM_MM_HASHER_ALGORITHM)
+        hasher = hasher_factory()

        for k, v in kwargs.items():
            for bytes_ in cls.iter_item_to_bytes(k, v):