[Multimodal] Always enable hashing mm data (#23308)

Signed-off-by: Roger Wang <hey@rogerw.io> Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-08-21 07:23:28 -07:00
parent f8daddcc4c
commit 79f05e4436
15 changed files with 94 additions and 148 deletions
--- a/vllm/v1/engine/processor.py
+++ b/vllm/v1/engine/processor.py
@@ -17,7 +17,6 @@ from vllm.multimodal.utils import argsort_mm_positions
 from vllm.pooling_params import PoolingParams
 from vllm.sampling_params import SamplingParams
 from vllm.transformers_utils.tokenizer_group import TokenizerGroup
-from vllm.utils import is_list_of
 from vllm.v1.engine import EngineCoreRequest
 from vllm.v1.engine.mm_input_cache import MultiModalInputCacheClient
 from vllm.v1.structured_output.backend_guidance import (
@@ -253,13 +252,10 @@ class Processor:
        # 1. Tokenize text prompt, with LoRA request if one exists.
        # 2. For multimodal models with a merged preprocessor, preprocess
        #   multimodal data and expand prompt token ids accordingly.
-        return_mm_hashes = (self.model_config.processor_return_mm_hashes
-                            or bool(self.cache_config.enable_prefix_caching))
        processed_inputs: ProcessorInputs = self.input_preprocessor.preprocess(
            prompt,
            tokenization_kwargs=tokenization_kwargs,
            lora_request=lora_request,
-            return_mm_hashes=return_mm_hashes,
        )
        from vllm.platforms import current_platform
        current_platform.validate_request(
@@ -302,7 +298,7 @@ class Processor:
        if decoder_inputs["type"] == "multimodal":
            decoder_mm_inputs = decoder_inputs["mm_kwargs"]
            decoder_mm_positions = decoder_inputs["mm_placeholders"]
-            decoder_mm_hashes = decoder_inputs.get("mm_hashes")
+            decoder_mm_hashes = decoder_inputs["mm_hashes"]

            # Merge and flatten multimodal placeholders, hashes and inputs
            # from dictionaries to lists, and sort them by each item's position
@@ -317,19 +313,15 @@ class Processor:
                decoder_mm_positions[modality][idx]
                for modality, idx in sorted_mm_idxs
            ]
-            sorted_mm_hashes = None if decoder_mm_hashes is None else [
+            sorted_mm_hashes = [
                decoder_mm_hashes[modality][idx]
                for modality, idx in sorted_mm_idxs
            ]

-            if sorted_mm_hashes is not None:
-                sorted_mm_inputs = self.mm_input_cache_client.get_and_update(
-                    orig_sorted_mm_inputs,
-                    sorted_mm_hashes,
-                )
-            else:
-                assert is_list_of(orig_sorted_mm_inputs, MultiModalKwargsItem)
-                sorted_mm_inputs = orig_sorted_mm_inputs
+            sorted_mm_inputs = self.mm_input_cache_client.get_and_update(
+                orig_sorted_mm_inputs,
+                sorted_mm_hashes,
+            )

        return decoder_inputs.get("prompt"), EngineCoreRequest(
            request_id=request_id,