diff --git a/vllm/v1/executor/ray_utils.py b/vllm/v1/executor/ray_utils.py index 67c5a58f7..1e707df7b 100644 --- a/vllm/v1/executor/ray_utils.py +++ b/vllm/v1/executor/ray_utils.py @@ -104,6 +104,18 @@ try: scheduler_output, intermediate_tensors ) if self._is_intermediate_tensors(output): + if ( + self.worker.model_runner.supports_mm_inputs + and get_pp_group().is_first_rank + ): + # Strip mm_features before Ray forwards it to the next PP Stage. + # PP Stage>0 only needs the intermediate tensors, + # not preprocessed multimodal data. + + # scheduled_new_reqs is a required field of SchedulerOutput, + # so accessing it directly will raise AttributeError if missing. + for req in scheduler_output.scheduled_new_reqs: + req.mm_features = [] return scheduler_output, grammar_output, output if isinstance(output, AsyncModelRunnerOutput):