[Bugfix] Loosen type check to avoid errors in V1 (#15021)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-03-18 20:54:40 +08:00
parent 64fc2193dc
commit ab656f2c2f
9 changed files with 28 additions and 37 deletions
--- a/vllm/model_executor/models/chameleon.py
+++ b/vllm/model_executor/models/chameleon.py
@@ -39,7 +39,7 @@ from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
 from vllm.sequence import IntermediateTensors

 from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
-from .utils import (is_pp_missing_parameter,
+from .utils import (flatten_bn, is_pp_missing_parameter,
                    make_empty_intermediate_tensors_factory, make_layers,
                    maybe_prefix, merge_multimodal_embeddings)

@@ -972,12 +972,11 @@ class ChameleonForConditionalGeneration(nn.Module, SupportsMultiModal,
        if pixel_values is None:
            return None

-        if not isinstance(pixel_values, torch.Tensor):
+        if not isinstance(pixel_values, (torch.Tensor, list)):
            raise ValueError("Incorrect type of pixel values. "
                             f"Got type: {type(pixel_values)}")

-        # Remove the N dimension until multiple images are supported.
-        pixel_values = pixel_values.squeeze(1)
+        pixel_values = flatten_bn(pixel_values, concat=True)

        return ChameleonImagePixelInputs(
            type="pixel_values",