[Feature] Support Pipeline Parallism in torchrun SPMD offline inference for V1 (#17827)

Signed-off-by: Lucia Fang <fanglu@fb.com>
2025-05-15 22:28:27 -07:00
parent 6b31c84aff
commit 3d2779c29a
9 changed files with 55 additions and 27 deletions
--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -275,13 +275,13 @@ class Worker(WorkerBase):

        output = self.model_runner.execute_model(scheduler_output,
                                                 intermediate_tensors)
-
-        if not get_pp_group().is_last_rank:
+        parallel_config = self.vllm_config.parallel_config
+        if parallel_config.distributed_executor_backend != "external_launcher" \
+            and not get_pp_group().is_last_rank:
            assert isinstance(output, IntermediateTensors)
            get_pp_group().send_tensor_dict(output.tensors,
                                            all_gather_group=get_tp_group())
            return None
-
        assert isinstance(output, ModelRunnerOutput)
        return output if self.is_driver_worker else None