diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py old mode 100755 new mode 100644 index 3935fe374..c3be1be85 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -335,6 +335,7 @@ class FusedMoE(CustomOp): expert_mapping: list[tuple[str, str, int, str]] | None = None, n_shared_experts: int | None = None, router_logits_dtype: torch.dtype | None = None, + has_shared_experts: bool = False, ): super().__init__() @@ -564,7 +565,7 @@ class FusedMoE(CustomOp): device=vllm_config.device_config.device, routing_method=self.routing_method_type, # TODO: in_dtype == out_dtype? - disable_inplace=disable_inplace() or self.shared_experts is not None, + disable_inplace=disable_inplace() or has_shared_experts, ) if self.use_mori_kernels: assert self.rocm_aiter_fmoe_enabled, ( diff --git a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py index cb601af70..937d13d34 100644 --- a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py @@ -26,6 +26,10 @@ class SharedFusedMoE(FusedMoE): routed_input_transform: torch.nn.Module | None = None, **kwargs, ): + # Pass has_shared_experts so FusedMoE.__init__ can set disable_inplace + # without accessing self.shared_experts (submodules cannot be set before + # Module.__init__()). + kwargs["has_shared_experts"] = shared_experts is not None super().__init__(**kwargs) self._shared_experts = shared_experts self._routed_input_transform = routed_input_transform