[MoE Refactor] MXFP4 Cutlass Experts to MK (#34542)

Signed-off-by: Yongye Zhu <zyy1102000@gmail.com>
2026-02-25 17:32:39 -08:00
parent cbf8f7028c
commit 1976356ee6
19 changed files with 454 additions and 169 deletions
--- a/vllm/model_executor/layers/fused_moe/modular_kernel.py
+++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py
@@ -564,9 +564,13 @@ class FusedMoEPermuteExpertsUnpermute(ABC):
    #

    @property
-    def quant_dtype(self) -> torch.dtype | None:
+    def quant_dtype(self) -> torch.dtype | str | None:
        return self.quant_config.quant_dtype

+    @property
+    def weight_quant_dtype(self) -> torch.dtype | str | None:
+        return self.quant_config.weight_quant_dtype
+
    @property
    def block_shape(self) -> list[int] | None:
        return self.quant_config.block_shape