From 49b9ae32e94b902b87e3d2894f5ac4a5f8dd4abb Mon Sep 17 00:00:00 2001
From: emricksini-h <emrick.birivoutin@hcompany.ai>
Date: Sat, 28 Feb 2026 17:14:29 +0100
Subject: [PATCH] [Fix] Avoid sending image input to other PP ranks (#35405)

Signed-off-by: emricksini-h <emrick.birivoutin@hcompany.ai>
Co-authored-by: Roger Wang <hey@rogerw.io>
---
 vllm/v1/executor/ray_utils.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/vllm/v1/executor/ray_utils.py b/vllm/v1/executor/ray_utils.py
index 67c5a58f7..1e707df7b 100644
--- a/vllm/v1/executor/ray_utils.py
+++ b/vllm/v1/executor/ray_utils.py
@@ -104,6 +104,18 @@ try:
                 scheduler_output, intermediate_tensors
             )
             if self._is_intermediate_tensors(output):
+                if (
+                    self.worker.model_runner.supports_mm_inputs
+                    and get_pp_group().is_first_rank
+                ):
+                    # Strip mm_features before Ray forwards it to the next PP Stage.
+                    # PP Stage>0 only needs the intermediate tensors,
+                    # not preprocessed multimodal data.
+
+                    # scheduled_new_reqs is a required field of SchedulerOutput,
+                    # so accessing it directly will raise AttributeError if missing.
+                    for req in scheduler_output.scheduled_new_reqs:
+                        req.mm_features = []
                 return scheduler_output, grammar_output, output
 
             if isinstance(output, AsyncModelRunnerOutput):