[Core] Simplify and unify mm uuid handling & auto-generated mm hash overrides processing. (#24271)
Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
This commit is contained in:
@@ -12,7 +12,7 @@ from vllm.inputs.preprocess import InputPreprocessor
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
|
||||
from vllm.multimodal.cache import processor_cache_from_config
|
||||
from vllm.multimodal.inputs import MultiModalFeatureSpec
|
||||
from vllm.multimodal.inputs import MultiModalFeatureSpec, MultiModalUUIDDict
|
||||
from vllm.multimodal.processing import EncDecMultiModalProcessor
|
||||
from vllm.multimodal.utils import argsort_mm_positions
|
||||
from vllm.pooling_params import PoolingParams
|
||||
@@ -276,11 +276,11 @@ class Processor:
|
||||
# Remember that this backend was set automatically
|
||||
params.guided_decoding.backend_was_auto = True
|
||||
|
||||
def _maybe_build_mm_hash_overrides(
|
||||
def _maybe_build_mm_uuids(
|
||||
self,
|
||||
request_id: str,
|
||||
prompt: PromptType,
|
||||
) -> Optional[dict[str, list[str]]]:
|
||||
) -> Optional[MultiModalUUIDDict]:
|
||||
"""Build per-item multimodal hash overrides when enabled. In this case,
|
||||
multimodal data items are identified by their request id, modality and
|
||||
index rather than their content.
|
||||
@@ -303,13 +303,13 @@ class Processor:
|
||||
if not mm_data:
|
||||
return None
|
||||
|
||||
overrides: dict[str, list[str]] = {}
|
||||
mm_uuids: MultiModalUUIDDict = {}
|
||||
for modality, data in mm_data.items():
|
||||
n = len(data) if isinstance(data, list) else 1
|
||||
overrides[modality] = [
|
||||
mm_uuids[modality] = [
|
||||
f"{request_id}-{modality}-{i}" for i in range(n)
|
||||
]
|
||||
return overrides
|
||||
return mm_uuids
|
||||
|
||||
def process_inputs(
|
||||
self,
|
||||
@@ -351,16 +351,15 @@ class Processor:
|
||||
if (self.model_config.multimodal_config and
|
||||
self.model_config.multimodal_config.mm_processor_cache_gb == 0
|
||||
and not self.cache_config.enable_prefix_caching):
|
||||
mm_hash_overrides = self._maybe_build_mm_hash_overrides(
|
||||
request_id, prompt)
|
||||
mm_uuids = self._maybe_build_mm_uuids(request_id, prompt)
|
||||
else:
|
||||
# Otherwise, use user-provided uuids as multimodal hash overrides
|
||||
# if provided.
|
||||
self._validate_multi_modal_uuids(prompt)
|
||||
if isinstance(prompt, dict):
|
||||
mm_hash_overrides = prompt.get("multi_modal_uuids")
|
||||
mm_uuids = prompt.get("multi_modal_uuids")
|
||||
else:
|
||||
mm_hash_overrides = None
|
||||
mm_uuids = None
|
||||
|
||||
# Process inputs, which includes:
|
||||
# 1. Tokenize text prompt, with LoRA request if one exists.
|
||||
@@ -370,7 +369,7 @@ class Processor:
|
||||
prompt,
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
lora_request=lora_request,
|
||||
mm_hash_overrides=mm_hash_overrides,
|
||||
mm_uuids=mm_uuids,
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
current_platform.validate_request(
|
||||
|
||||
Reference in New Issue
Block a user