[Distributed] Add enable_expert_parallel arg (#14305)
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
committed by
GitHub
parent
cd579352bf
commit
cc2f9b32c8
@@ -754,7 +754,7 @@ class ModelConfig:
|
||||
" must be divisible by tensor parallel size "
|
||||
f"({tensor_parallel_size}).")
|
||||
|
||||
if envs.VLLM_TEST_ENABLE_EP:
|
||||
if parallel_config.enable_expert_parallel:
|
||||
self._verify_with_expert_parallelism()
|
||||
|
||||
pipeline_parallel_size = parallel_config.pipeline_parallel_size
|
||||
@@ -1334,6 +1334,7 @@ class ParallelConfig:
|
||||
# IP of the data parallel master.
|
||||
data_parallel_master_ip: str = "127.0.0.1"
|
||||
data_parallel_master_port: int = 29500 # Port of the data parallel master.
|
||||
enable_expert_parallel: bool = False # Use EP instead of TP for MoE layers.
|
||||
|
||||
# Maximum number of multiple batches
|
||||
# when load model sequentially. To avoid RAM OOM when using tensor
|
||||
|
||||
Reference in New Issue
Block a user