[Feature] Support Pipeline Parallism in torchrun SPMD offline inference for V1 (#17827)

Signed-off-by: Lucia Fang <fanglu@fb.com>
2025-05-15 22:28:27 -07:00
parent 6b31c84aff
commit 3d2779c29a
9 changed files with 55 additions and 27 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1695,7 +1695,6 @@ class ParallelConfig:
    """Port of the data parallel master."""
    enable_expert_parallel: bool = False
    """Use expert parallelism instead of tensor parallelism for MoE layers."""
-
    max_parallel_loading_workers: Optional[int] = None
    """Maximum number of parallel loading workers when loading model
    sequentially in multiple batches. To avoid RAM OOM when using tensor