[V1] EP/TP MoE + DP Attention (#13931)
This commit is contained in:
committed by
GitHub
parent
0a995d5434
commit
72c62eae5f
@@ -100,6 +100,7 @@ class Qwen2MoeSparseMoeBlock(nn.Module):
|
||||
self,
|
||||
config: PretrainedConfig,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
prefix: str = "",
|
||||
):
|
||||
super().__init__()
|
||||
self.tp_size = get_tensor_model_parallel_world_size()
|
||||
@@ -115,7 +116,8 @@ class Qwen2MoeSparseMoeBlock(nn.Module):
|
||||
intermediate_size=config.moe_intermediate_size,
|
||||
reduce_results=False,
|
||||
renormalize=config.norm_topk_prob,
|
||||
quant_config=quant_config)
|
||||
quant_config=quant_config,
|
||||
prefix=f"{prefix}.experts")
|
||||
|
||||
self.gate = ReplicatedLinear(config.hidden_size,
|
||||
config.num_experts,
|
||||
@@ -277,7 +279,8 @@ class Qwen2MoeDecoderLayer(nn.Module):
|
||||
config.num_experts > 0 and
|
||||
(layer_idx + 1) % config.decoder_sparse_step == 0):
|
||||
self.mlp = Qwen2MoeSparseMoeBlock(config=config,
|
||||
quant_config=quant_config)
|
||||
quant_config=quant_config,
|
||||
prefix=f"{prefix}.mlp")
|
||||
else:
|
||||
self.mlp = Qwen2MoeMLP(
|
||||
hidden_size=config.hidden_size,
|
||||
|
||||
Reference in New Issue
Block a user