[Bugfix] Disable moe inplace for torch >= 2.9 (#26497)

Signed-off-by: Bill Nell <bnell@redhat.com>
This commit is contained in:
bnellnm
2025-10-09 14:07:38 -04:00
committed by GitHub
parent 4069db3f2e
commit a462331e36
4 changed files with 22 additions and 6 deletions

View File

@@ -39,6 +39,7 @@ from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
from vllm.model_executor.layers.fused_moe.utils import (
_resize_cache,
activation_without_mul,
disable_inplace,
moe_kernel_quantize_input,
)
from vllm.model_executor.layers.quantization.utils.mxfp4_utils import dequant_mxfp4
@@ -1516,7 +1517,7 @@ def torch_vllm_outplace_fused_experts(**kwargs) -> torch.Tensor:
def dispatch_fused_experts_func(inplace: bool) -> Callable[..., torch.Tensor]:
if inplace:
if inplace and not disable_inplace():
return torch_vllm_inplace_fused_experts
return torch_vllm_outplace_fused_experts
@@ -1766,7 +1767,10 @@ def fused_experts_impl(
else:
raise ValueError(f"Unsupported compute_type: {hidden_states.dtype}")
out_hidden_states = hidden_states if inplace else torch.empty_like(hidden_states)
if inplace and not disable_inplace():
out_hidden_states = hidden_states
else:
out_hidden_states = torch.empty_like(hidden_states)
if ocp_mx_scheme is not None:
# TODO: On platforms for which `current_platform.supports_mx()` is True