[Distributed] Add enable_expert_parallel arg (#14305)

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
Tyler Michael Smith
2025-03-06 13:54:45 -05:00
committed by GitHub
parent cd579352bf
commit cc2f9b32c8
5 changed files with 27 additions and 21 deletions

View File

@@ -754,7 +754,7 @@ class ModelConfig:
" must be divisible by tensor parallel size "
f"({tensor_parallel_size}).")
if envs.VLLM_TEST_ENABLE_EP:
if parallel_config.enable_expert_parallel:
self._verify_with_expert_parallelism()
pipeline_parallel_size = parallel_config.pipeline_parallel_size
@@ -1334,6 +1334,7 @@ class ParallelConfig:
# IP of the data parallel master.
data_parallel_master_ip: str = "127.0.0.1"
data_parallel_master_port: int = 29500 # Port of the data parallel master.
enable_expert_parallel: bool = False # Use EP instead of TP for MoE layers.
# Maximum number of multiple batches
# when load model sequentially. To avoid RAM OOM when using tensor