From bf0f3a4638869e3fdb2df38b61f41557acc69dfe Mon Sep 17 00:00:00 2001 From: kzwrime <66299956+kzwrime@users.noreply.github.com> Date: Tue, 6 Jan 2026 20:06:20 +0800 Subject: [PATCH] [Bugfix] Fix torch.compile error for DP + MoE on CPU Backend (#31650) Signed-off-by: kunzh --- vllm/model_executor/layers/fused_moe/layer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 323e0ee09..374dffde5 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -1899,11 +1899,11 @@ class FusedMoE(CustomOp): ) post_quant_allgather = ( - has_flashinfer_trtllm_fused_moe() - and self.quant_method is not None + self.quant_method is not None and self.dp_size > 1 and self.use_ep and isinstance(self.quant_method, ModelOptNvFp4FusedMoE) + and has_flashinfer_trtllm_fused_moe() ) if post_quant_allgather: hidden_states_to_dispatch, extra_tensors = (