[MoE Refactor][5/N] Isolate zero expert to LongCatFlash (#28891)
Signed-off-by: baonudesifeizhai <85092850+baonudesifeizhai@users.noreply.github.com> Signed-off-by: Dongjie Zou <85092850+baonudesifeizhai@users.noreply.github.com> Signed-off-by: baonudesifeizhai <baonudesifeizhai@gmail.com> Signed-off-by: Robert Shaw <robertgshaw2@gmail.com> Co-authored-by: Robert Shaw <robshaw@redhat.com> Co-authored-by: Robert Shaw <robertgshaw2@gmail.com>
This commit is contained in:
@@ -1292,13 +1292,11 @@ class Fp8MoEMethod(FusedMoEMethodBase):
|
||||
apply_router_weight_on_input=layer.apply_router_weight_on_input,
|
||||
)
|
||||
|
||||
select_result = layer.select_experts(
|
||||
topk_weights, topk_ids = layer.select_experts(
|
||||
hidden_states=x,
|
||||
router_logits=router_logits,
|
||||
)
|
||||
|
||||
topk_weights, topk_ids, zero_expert_result = select_result
|
||||
|
||||
if self.rocm_aiter_moe_enabled:
|
||||
from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( # noqa: E501
|
||||
rocm_aiter_fused_experts,
|
||||
@@ -1353,13 +1351,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
|
||||
apply_router_weight_on_input=layer.apply_router_weight_on_input,
|
||||
)
|
||||
|
||||
if layer.zero_expert_num != 0 and layer.zero_expert_type is not None:
|
||||
assert not isinstance(result, tuple), (
|
||||
"Shared + zero experts are mutually exclusive not yet supported"
|
||||
)
|
||||
return result, zero_expert_result
|
||||
else:
|
||||
return result
|
||||
return result
|
||||
|
||||
|
||||
class Fp8OnlineMoEMethod(Fp8MoEMethod):
|
||||
|
||||
Reference in New Issue
Block a user