From 1a834df2d40d3279712e5c8d4b8c326d52b1f523 Mon Sep 17 00:00:00 2001 From: Pleaplusone Date: Tue, 30 Dec 2025 17:21:49 +0800 Subject: [PATCH] [ROCm][Bugfix] Fix accuracy issue on fmoe when `VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS` enabled (#31523) Signed-off-by: ganyi --- vllm/platforms/rocm.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index b95287906..785e457fc 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -407,8 +407,10 @@ class RocmPlatform(Platform): compilation_config = vllm_config.compilation_config parallel_config = vllm_config.parallel_config is_eager_execution = compilation_config == CUDAGraphMode.NONE + use_aiter_fused_moe = rocm_aiter_ops.is_fused_moe_enabled() use_aiter_rms_norm = rocm_aiter_ops.is_rmsnorm_enabled() use_aiter_fp8_linear = rocm_aiter_ops.is_linear_fp8_enabled() + use_aiter_fused_se = rocm_aiter_ops.is_fusion_moe_shared_experts_enabled() if compilation_config.cudagraph_mode.has_full_cudagraphs(): # decode context parallel does not support full cudagraphs @@ -458,6 +460,22 @@ class RocmPlatform(Platform): if use_aiter_fp8_linear and "-quant_fp8" not in compilation_config.custom_ops: compilation_config.custom_ops.append("+quant_fp8") + if use_aiter_fused_se and "-grouped_topk" in compilation_config.custom_ops: + logger.warning_once( + "VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS is enabled, which " + "requires the 'grouped_topk' custom op. Overriding the " + "user-provided '-grouped_topk'." + ) + compilation_config.custom_ops.remove("-grouped_topk") + # Ensure grouped_topk is always enabled when using AITER if + # its not disabled by user + if ( + use_aiter_fused_moe + and "+grouped_topk" not in compilation_config.custom_ops + and "-grouped_topk" not in compilation_config.custom_ops + ): + compilation_config.custom_ops.append("+grouped_topk") + @classmethod def verify_model_arch(cls, model_arch: str) -> None: if model_arch in _ROCM_UNSUPPORTED_MODELS: