[Refactor] Decouple TimingContext from InputProcessingContext (#35083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-02-23 22:15:50 +08:00
committed by GitHub
parent 1e8438a89a
commit 392645454b
38 changed files with 419 additions and 649 deletions

View File

@@ -21,16 +21,17 @@ from vllm.multimodal.parse import (
ImageEmbeddingItems,
ImageProcessorItems,
MultiModalDataItems,
MultiModalUUIDItems,
)
from vllm.multimodal.processing import (
BaseDummyInputsBuilder,
BaseMultiModalProcessor,
BaseProcessingInfo,
ProcessorInputs,
PromptIndexTargets,
PromptInsertion,
PromptUpdate,
PromptUpdateDetails,
TimingContext,
)
from vllm.renderers import TokenizeParams
from vllm.sequence import IntermediateTensors
@@ -228,19 +229,10 @@ class PaliGemmaMultiModalProcessor(BaseMultiModalProcessor[PaliGemmaProcessingIn
def apply(
self,
prompt: str | list[int],
mm_items: MultiModalDataItems,
mm_uuid_items: MultiModalUUIDItems | None = None,
hf_processor_mm_kwargs: Mapping[str, object] | None = None,
tokenization_kwargs: Mapping[str, object] | None = None,
inputs: ProcessorInputs,
timing_ctx: TimingContext,
) -> MultiModalInputs:
mm_inputs = super().apply(
prompt,
mm_items,
mm_uuid_items,
hf_processor_mm_kwargs=hf_processor_mm_kwargs,
tokenization_kwargs=tokenization_kwargs,
)
mm_inputs = super().apply(inputs, timing_ctx)
prompt_token_ids = mm_inputs["prompt_token_ids"]
tokenizer = self.info.get_tokenizer()