[Bugfix] Disable moe inplace for torch >= 2.9 (#26497)

Signed-off-by: Bill Nell <bnell@redhat.com>
2025-10-09 14:07:38 -04:00
parent 4069db3f2e
commit a462331e36
4 changed files with 22 additions and 6 deletions
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -39,6 +39,7 @@ from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
 from vllm.model_executor.layers.fused_moe.utils import (
    _resize_cache,
    activation_without_mul,
+    disable_inplace,
    moe_kernel_quantize_input,
 )
 from vllm.model_executor.layers.quantization.utils.mxfp4_utils import dequant_mxfp4
@@ -1516,7 +1517,7 @@ def torch_vllm_outplace_fused_experts(**kwargs) -> torch.Tensor:


 def dispatch_fused_experts_func(inplace: bool) -> Callable[..., torch.Tensor]:
-    if inplace:
+    if inplace and not disable_inplace():
        return torch_vllm_inplace_fused_experts
    return torch_vllm_outplace_fused_experts

@@ -1766,7 +1767,10 @@ def fused_experts_impl(
    else:
        raise ValueError(f"Unsupported compute_type: {hidden_states.dtype}")

-    out_hidden_states = hidden_states if inplace else torch.empty_like(hidden_states)
+    if inplace and not disable_inplace():
+        out_hidden_states = hidden_states
+    else:
+        out_hidden_states = torch.empty_like(hidden_states)

    if ocp_mx_scheme is not None:
        # TODO: On platforms for which `current_platform.supports_mx()` is True