From 49b9ae32e94b902b87e3d2894f5ac4a5f8dd4abb Mon Sep 17 00:00:00 2001 From: emricksini-h Date: Sat, 28 Feb 2026 17:14:29 +0100 Subject: [PATCH] [Fix] Avoid sending image input to other PP ranks (#35405) Signed-off-by: emricksini-h Co-authored-by: Roger Wang --- vllm/v1/executor/ray_utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/vllm/v1/executor/ray_utils.py b/vllm/v1/executor/ray_utils.py index 67c5a58f7..1e707df7b 100644 --- a/vllm/v1/executor/ray_utils.py +++ b/vllm/v1/executor/ray_utils.py @@ -104,6 +104,18 @@ try: scheduler_output, intermediate_tensors ) if self._is_intermediate_tensors(output): + if ( + self.worker.model_runner.supports_mm_inputs + and get_pp_group().is_first_rank + ): + # Strip mm_features before Ray forwards it to the next PP Stage. + # PP Stage>0 only needs the intermediate tensors, + # not preprocessed multimodal data. + + # scheduled_new_reqs is a required field of SchedulerOutput, + # so accessing it directly will raise AttributeError if missing. + for req in scheduler_output.scheduled_new_reqs: + req.mm_features = [] return scheduler_output, grammar_output, output if isinstance(output, AsyncModelRunnerOutput):