[Bugfix] fix paddleocr crash on some image shape (#36959)

Signed-off-by: wangzhengtao <wangzhengtao@msh.team> Signed-off-by: bigmoyan <moyan_work@foxmail.com> Co-authored-by: wangzhengtao <wangzhengtao@msh.team> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2026-03-13 21:46:55 +08:00
parent d5af196c18
commit 4508532fbd
1 changed files with 6 additions and 1 deletions
--- a/vllm/model_executor/models/paddleocr_vl.py
+++ b/vllm/model_executor/models/paddleocr_vl.py
@@ -25,6 +25,7 @@ import torch.nn as nn
 from einops import rearrange
 from transformers import BaseImageProcessor, BatchFeature, PretrainedConfig
 from transformers.activations import GELUActivation
+from transformers.image_utils import ChannelDimension
 from transformers.modeling_outputs import (
    BaseModelOutputWithPooling,
 )
@@ -249,8 +250,12 @@ class PaddleOCRVLMultiModalProcessor(
        tok_kwargs: Mapping[str, object],
    ) -> BatchFeature:
        if mm_data:
+            final_mm_kwargs = dict(mm_kwargs or {})
+            final_mm_kwargs.setdefault("images_kwargs", {})
+            # vLLM use PIL.Image, always set channel_last
+            final_mm_kwargs["input_data_format"] = ChannelDimension.LAST
            processed_outputs = self.info.ctx.call_hf_processor(
-                self.info.get_hf_processor(**mm_kwargs),
+                self.info.get_hf_processor(**final_mm_kwargs),
                dict(text=prompt, **mm_data),
                dict(**mm_kwargs, **tok_kwargs),
            )