diff --git a/vllm/model_executor/layers/fused_moe/runner/shared_experts.py b/vllm/model_executor/layers/fused_moe/runner/shared_experts.py index 6d2189cb4..bb8645c8f 100644 --- a/vllm/model_executor/layers/fused_moe/runner/shared_experts.py +++ b/vllm/model_executor/layers/fused_moe/runner/shared_experts.py @@ -93,24 +93,24 @@ class SharedExperts: ) @property - def _has_external_experts(self) -> bool: + def _use_external_experts(self) -> bool: + if self._use_dp_chunking: + return False + # Disable shared expert overlap if: # - we are using eplb with non-default backend, because of correctness issues # - we are using flashinfer with DP, since there nothing to gain backend = self._moe_config.moe_parallel_config.all2all_backend - return not ( - ( - self._moe_config.moe_parallel_config.enable_eplb - and backend != "allgather_reducescatter" - ) - or self._moe_config.moe_parallel_config.use_fi_nvl_two_sided_kernels - ) + return ( + self._moe_config.moe_parallel_config.enable_eplb + and backend != "allgather_reducescatter" + ) or self._moe_config.moe_parallel_config.use_fi_nvl_two_sided_kernels def _determine_shared_experts_order( self, hidden_states: torch.Tensor, ) -> SharedExpertsOrder: - if self._has_external_experts and not self._use_dp_chunking: + if self._use_external_experts: return SharedExpertsOrder.EXTERNAL if self._quant_method.mk_owns_shared_expert: