[Bugfix] Disable moe inplace for torch >= 2.9 (#26497)

Signed-off-by: Bill Nell <bnell@redhat.com>
2025-10-09 14:07:38 -04:00
parent 4069db3f2e
commit a462331e36
4 changed files with 22 additions and 6 deletions
--- a/vllm/model_executor/layers/fused_moe/modular_kernel.py
+++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py
@@ -13,6 +13,7 @@ from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
 from vllm.model_executor.layers.fused_moe.utils import (
    _resize_cache,
    count_expert_num_tokens,
+    disable_inplace,
 )
 from vllm.utils import cdiv
 from vllm.v1.worker.ubatching import (
@@ -1139,7 +1140,7 @@ class FusedMoEModularKernel(torch.nn.Module):
        - torch.Tensor: The output tensor after applying the MoE layer.
        """

-        if inplace and self.shared_experts is None:
+        if inplace and self.shared_experts is None and not disable_inplace():
            output = hidden_states
        else:
            output = torch.zeros_like(hidden_states)