[Feature] Expert Parallelism Load Balancer (EPLB) (#18343)
Signed-off-by: Bowen Wang <abmfy@icloud.com>
This commit is contained in:
@@ -482,7 +482,15 @@ class AWQMoEMethod(FusedMoEMethodBase):
|
||||
e_score_correction_bias: Optional[torch.Tensor] = None,
|
||||
apply_router_weight_on_input: bool = False,
|
||||
activation: str = "silu",
|
||||
enable_eplb: bool = False,
|
||||
expert_load_view: Optional[torch.Tensor] = None,
|
||||
logical_to_physical_map: Optional[torch.Tensor] = None,
|
||||
logical_replica_count: Optional[torch.Tensor] = None,
|
||||
) -> torch.Tensor:
|
||||
if enable_eplb:
|
||||
raise NotImplementedError(
|
||||
"EPLB not supported for `AWQMoEMethod` yet.")
|
||||
|
||||
assert activation == "silu", "Only SiLU activation is supported."
|
||||
|
||||
if apply_router_weight_on_input:
|
||||
|
||||
Reference in New Issue
Block a user