Expert Parallelism (EP) Support for DeepSeek V2 (#12583)
This commit is contained in:
@@ -677,6 +677,23 @@ class ModelConfig:
|
||||
"fallback to the eager mode.")
|
||||
self.enforce_eager = True
|
||||
|
||||
def _verify_with_expert_parallelism(self) -> None:
|
||||
num_expert_names = [
|
||||
"moe_num_experts", # Dbrx
|
||||
"num_experts", # Jamba
|
||||
"n_routed_experts", # DeepSeek
|
||||
"num_local_experts", # Mixtral
|
||||
]
|
||||
num_experts = 0
|
||||
for name in num_expert_names:
|
||||
num_experts = getattr(self.hf_text_config, name, 0)
|
||||
if num_experts > 0:
|
||||
break
|
||||
if num_experts < 1:
|
||||
raise ValueError(
|
||||
"Number of experts in the model must be greater than 0 "
|
||||
"when expert parallelism is enabled.")
|
||||
|
||||
def verify_async_output_proc(self, parallel_config, speculative_config,
|
||||
device_config) -> None:
|
||||
if not self.use_async_output_proc:
|
||||
@@ -730,6 +747,9 @@ class ModelConfig:
|
||||
" must be divisible by tensor parallel size "
|
||||
f"({tensor_parallel_size}).")
|
||||
|
||||
if envs.VLLM_TEST_ENABLE_EP:
|
||||
self._verify_with_expert_parallelism()
|
||||
|
||||
pipeline_parallel_size = parallel_config.pipeline_parallel_size
|
||||
if pipeline_parallel_size > 1:
|
||||
architectures = getattr(self.hf_config, "architectures", [])
|
||||
|
||||
Reference in New Issue
Block a user