From 3628bcaaf229f3ce86b64e73ab88dd64211ddf38 Mon Sep 17 00:00:00 2001 From: Zhiwei <532707544@qq.com> Date: Fri, 5 Dec 2025 19:01:16 +0800 Subject: [PATCH] [ROCm][MXFP4] Infer w4a4 quant method in rocm aiter fused moe (#29775) Signed-off-by: ZhiweiYan-96 --- vllm/model_executor/layers/fused_moe/config.py | 4 ++++ vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/config.py b/vllm/model_executor/layers/fused_moe/config.py index 1826fafa8..e52845dfa 100644 --- a/vllm/model_executor/layers/fused_moe/config.py +++ b/vllm/model_executor/layers/fused_moe/config.py @@ -345,6 +345,10 @@ class FusedMoEQuantConfig: def use_mxfp4_w4a16(self) -> bool: return self._a1.dtype is None and self._w1.dtype == "mxfp4" + @property + def use_mxfp4_w4a4(self) -> bool: + return self._a1.dtype == "mxfp4" and self._w1.dtype == "mxfp4" + @property def use_nvfp4_w4a4(self) -> bool: return self.quant_dtype == "nvfp4" diff --git a/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py b/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py index 8f05828d7..882ad0a53 100644 --- a/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py @@ -221,8 +221,8 @@ def rocm_aiter_fused_experts( else: quant_method = QuantMethod.NO.value - # quark moe for mxfp4 w_dtype - if quant_config.use_mxfp4_w4a16: + # quark moe for mxfp4 w_dtype mxfp4 a_dtype + if quant_config.use_mxfp4_w4a4: quant_method = QuantMethod.BLOCK_1X32.value # w8a8 block-scaled if quant_config.block_shape is not None and quant_config.use_fp8_w8a8: