[V1] EP/TP MoE + DP Attention (#13931)
This commit is contained in:
committed by
GitHub
parent
0a995d5434
commit
72c62eae5f
@@ -249,6 +249,7 @@ class PhiMoE(nn.Module):
|
||||
params_dtype: Optional[torch.dtype] = None,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
tp_size: Optional[int] = None,
|
||||
prefix: str = "",
|
||||
):
|
||||
super().__init__()
|
||||
self.hidden_size = hidden_size
|
||||
@@ -272,7 +273,8 @@ class PhiMoE(nn.Module):
|
||||
renormalize=False,
|
||||
quant_config=quant_config,
|
||||
tp_size=tp_size,
|
||||
custom_routing_function=phimoe_routing_function)
|
||||
custom_routing_function=phimoe_routing_function,
|
||||
prefix=f"{prefix}.experts")
|
||||
|
||||
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
||||
# NOTE: hidden_states can have either 1D or 2D shape.
|
||||
@@ -396,6 +398,7 @@ class PhiMoEDecoderLayer(nn.Module):
|
||||
hidden_size=config.hidden_size,
|
||||
intermediate_size=config.intermediate_size,
|
||||
quant_config=quant_config,
|
||||
prefix=f"{prefix}.block_sparse_moe",
|
||||
)
|
||||
self.input_layernorm = nn.LayerNorm(config.hidden_size,
|
||||
eps=config.rms_norm_eps,
|
||||
|
||||
Reference in New Issue
Block a user