Expert Parallelism (EP) Support for DeepSeek V2 (#12583)

This commit is contained in:
Jongseok Park
2025-02-24 07:33:20 -08:00
committed by GitHub
parent 7940d8a6a7
commit 781096e385
19 changed files with 527 additions and 59 deletions

View File

@@ -106,10 +106,6 @@ class DeepseekV2MoE(nn.Module):
self.routed_scaling_factor = config.routed_scaling_factor
self.n_shared_experts = config.n_shared_experts
self.routed_scaling_factor = config.routed_scaling_factor
if self.tp_size > config.n_routed_experts:
raise ValueError(
f"Tensor parallel size {self.tp_size} is greater than "
f"the number of experts {config.n_routed_experts}.")
if config.hidden_act != "silu":
raise ValueError(f"Unsupported activation: {config.hidden_act}. "