[Misc] Clean up processing logic (#37541)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-03-19 21:30:20 +08:00
committed by GitHub
parent c63ca2b2e6
commit 9515c20868
9 changed files with 477 additions and 679 deletions

View File

@@ -7,7 +7,7 @@
# Copyright (c) 2024 NVIDIA
# Licensed under Apache 2.0 License [see LICENSE for details]
# --------------------------------------------------------
from collections.abc import Mapping, Sequence
from collections.abc import Mapping
import torch
import torch.nn as nn
@@ -16,7 +16,10 @@ from transformers import PretrainedConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import MultiModalDataDict, MultiModalKwargsItems
from vllm.multimodal.inputs import (
BatchedTensorInputs,
MultiModalDataDict,
)
from vllm.multimodal.parse import (
ImageEmbeddingItems,
ImageProcessorItems,
@@ -24,7 +27,6 @@ from vllm.multimodal.parse import (
)
from vllm.multimodal.processing import (
PromptReplacement,
PromptUpdate,
PromptUpdateDetails,
)
from vllm.transformers_utils.processors.internvl import InternVLImageProcessor
@@ -100,15 +102,12 @@ class NVLMDummyInputsBuilder(BaseInternVLDummyInputsBuilder[NVLMProcessingInfo])
class NVLMMultiModalProcessor(BaseInternVLMultiModalProcessor[NVLMProcessingInfo]):
def _get_prompt_updates(
def _get_prompt_repl_image(
self,
mm_items: MultiModalDataItems,
hf_processor_mm_kwargs: Mapping[str, object],
out_mm_kwargs: MultiModalKwargsItems,
) -> Sequence[PromptUpdate]:
hf_processor = self.info.get_hf_processor(**hf_processor_mm_kwargs)
out_mm_data = out_mm_kwargs.get_data()
hf_processor: NVLMProcessor,
out_mm_data: BatchedTensorInputs,
):
if "image_num_patches" in out_mm_data:
image_num_patches = out_mm_data["image_num_patches"]
assert isinstance(image_num_patches, torch.Tensor)
@@ -146,13 +145,11 @@ class NVLMMultiModalProcessor(BaseInternVLMultiModalProcessor[NVLMProcessingInfo
)
# See note in dummy data regarding why we have the extra newline
return [
PromptReplacement(
modality="image",
target="<image>\n",
replacement=get_replacement_nvlm,
)
]
return PromptReplacement(
modality="image",
target="<image>\n",
replacement=get_replacement_nvlm,
)
@MULTIMODAL_REGISTRY.register_processor(