[Bugfix] Disable moe inplace for torch >= 2.9 (#26497)

Signed-off-by: Bill Nell <bnell@redhat.com>
This commit is contained in:
bnellnm
2025-10-09 14:07:38 -04:00
committed by GitHub
parent 4069db3f2e
commit a462331e36
4 changed files with 22 additions and 6 deletions

View File

@@ -13,6 +13,7 @@ from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
from vllm.model_executor.layers.fused_moe.utils import (
_resize_cache,
count_expert_num_tokens,
disable_inplace,
)
from vllm.utils import cdiv
from vllm.v1.worker.ubatching import (
@@ -1139,7 +1140,7 @@ class FusedMoEModularKernel(torch.nn.Module):
- torch.Tensor: The output tensor after applying the MoE layer.
"""
if inplace and self.shared_experts is None:
if inplace and self.shared_experts is None and not disable_inplace():
output = hidden_states
else:
output = torch.zeros_like(hidden_states)