[Multimodal] Always enable hashing mm data (#23308)

Signed-off-by: Roger Wang <hey@rogerw.io>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Roger Wang
2025-08-21 07:23:28 -07:00
committed by GitHub
parent f8daddcc4c
commit 79f05e4436
15 changed files with 94 additions and 148 deletions

View File

@@ -17,7 +17,6 @@ from vllm.multimodal.utils import argsort_mm_positions
from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingParams
from vllm.transformers_utils.tokenizer_group import TokenizerGroup
from vllm.utils import is_list_of
from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.mm_input_cache import MultiModalInputCacheClient
from vllm.v1.structured_output.backend_guidance import (
@@ -253,13 +252,10 @@ class Processor:
# 1. Tokenize text prompt, with LoRA request if one exists.
# 2. For multimodal models with a merged preprocessor, preprocess
# multimodal data and expand prompt token ids accordingly.
return_mm_hashes = (self.model_config.processor_return_mm_hashes
or bool(self.cache_config.enable_prefix_caching))
processed_inputs: ProcessorInputs = self.input_preprocessor.preprocess(
prompt,
tokenization_kwargs=tokenization_kwargs,
lora_request=lora_request,
return_mm_hashes=return_mm_hashes,
)
from vllm.platforms import current_platform
current_platform.validate_request(
@@ -302,7 +298,7 @@ class Processor:
if decoder_inputs["type"] == "multimodal":
decoder_mm_inputs = decoder_inputs["mm_kwargs"]
decoder_mm_positions = decoder_inputs["mm_placeholders"]
decoder_mm_hashes = decoder_inputs.get("mm_hashes")
decoder_mm_hashes = decoder_inputs["mm_hashes"]
# Merge and flatten multimodal placeholders, hashes and inputs
# from dictionaries to lists, and sort them by each item's position
@@ -317,19 +313,15 @@ class Processor:
decoder_mm_positions[modality][idx]
for modality, idx in sorted_mm_idxs
]
sorted_mm_hashes = None if decoder_mm_hashes is None else [
sorted_mm_hashes = [
decoder_mm_hashes[modality][idx]
for modality, idx in sorted_mm_idxs
]
if sorted_mm_hashes is not None:
sorted_mm_inputs = self.mm_input_cache_client.get_and_update(
orig_sorted_mm_inputs,
sorted_mm_hashes,
)
else:
assert is_list_of(orig_sorted_mm_inputs, MultiModalKwargsItem)
sorted_mm_inputs = orig_sorted_mm_inputs
sorted_mm_inputs = self.mm_input_cache_client.get_and_update(
orig_sorted_mm_inputs,
sorted_mm_hashes,
)
return decoder_inputs.get("prompt"), EngineCoreRequest(
request_id=request_id,