[Refactor] Decouple TimingContext from InputProcessingContext (#35083)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -30,15 +30,16 @@ from vllm.multimodal.parse import (
|
||||
ImageProcessorItems,
|
||||
ImageSize,
|
||||
MultiModalDataItems,
|
||||
MultiModalUUIDItems,
|
||||
)
|
||||
from vllm.multimodal.processing import BaseDummyInputsBuilder
|
||||
from vllm.multimodal.processing.processor import (
|
||||
BaseMultiModalProcessor,
|
||||
BaseProcessingInfo,
|
||||
MultiModalProcessingInfo,
|
||||
ProcessorInputs,
|
||||
PromptReplacement,
|
||||
PromptUpdate,
|
||||
TimingContext,
|
||||
)
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.tokenizers import cached_tokenizer_from_config
|
||||
@@ -310,32 +311,17 @@ class DeepseekVL2MultiModalProcessor(
|
||||
|
||||
def _cached_apply_hf_processor(
|
||||
self,
|
||||
prompt: str | list[int],
|
||||
mm_data_items: MultiModalDataItems,
|
||||
mm_uuid_items: MultiModalUUIDItems | None,
|
||||
hf_processor_mm_kwargs: Mapping[str, object],
|
||||
tokenization_kwargs: Mapping[str, object],
|
||||
inputs: ProcessorInputs,
|
||||
timing_ctx: TimingContext,
|
||||
) -> tuple[list[int], MultiModalProcessingInfo, bool]:
|
||||
# The processor logic is different for len(images) <= 2 vs > 2
|
||||
# Since the processing cache assumes that the processor output is
|
||||
# invariant of how many images are passed per prompt, we only
|
||||
# perform caching for the most common case
|
||||
if mm_data_items.get_count("image", strict=False) > 2:
|
||||
return self._apply_hf_processor(
|
||||
prompt=prompt,
|
||||
mm_data_items=mm_data_items,
|
||||
mm_uuid_items=mm_uuid_items,
|
||||
hf_processor_mm_kwargs=hf_processor_mm_kwargs,
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
)
|
||||
if inputs.mm_data_items.get_count("image", strict=False) > 2:
|
||||
return self._apply_hf_processor(inputs, timing_ctx)
|
||||
|
||||
return super()._cached_apply_hf_processor(
|
||||
prompt=prompt,
|
||||
mm_data_items=mm_data_items,
|
||||
mm_uuid_items=mm_uuid_items,
|
||||
hf_processor_mm_kwargs=hf_processor_mm_kwargs,
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
)
|
||||
return super()._cached_apply_hf_processor(inputs, timing_ctx)
|
||||
|
||||
|
||||
@MULTIMODAL_REGISTRY.register_processor(
|
||||
|
||||
Reference in New Issue
Block a user