[Feature]: Remove Chunking From FusedMoE (#34086)

Signed-off-by: SouthWest7 <am1ao@qq.com> Signed-off-by: Southwest <1403572259@qq.com> Signed-off-by: southwest <am1ao@qq.com> Signed-off-by: Xinan Miao <1403572259@qq.com> Co-authored-by: SouthWest7 <am1ao@qq.com>
2026-03-13 02:24:38 +08:00
parent c973ecdead
commit 2cdf92228c
28 changed files with 152 additions and 523 deletions
--- a/tests/kernels/moe/modular_kernel_tools/mk_objects.py
+++ b/tests/kernels/moe/modular_kernel_tools/mk_objects.py
@@ -64,7 +64,6 @@ class ExpertInfo:
    activation_format: mk.FusedMoEActivationFormat
    supported_dtypes: list[torch.dtype | str]
    blocked_quantization_support: bool
-    supports_chunking: bool
    supports_expert_map: bool
    needs_matching_quant: bool = False
    needs_deep_gemm: bool = False
@@ -127,7 +126,6 @@ def register_experts(
    activation_format: mk.FusedMoEActivationFormat,
    supported_dtypes: list[torch.dtype | str],
    blocked_quantization_support: bool,
-    supports_chunking: bool,
    supports_expert_map: bool,
    needs_matching_quant: bool = False,
    needs_deep_gemm: bool = False,
@@ -141,7 +139,6 @@ def register_experts(
        activation_format,
        supported_dtypes,
        blocked_quantization_support,
-        supports_chunking,
        supports_expert_map,
        needs_matching_quant,
        needs_deep_gemm,
@@ -176,7 +173,6 @@ register_experts(
    batched_format,
    common_float_types,
    blocked_quantization_support=True,
-    supports_chunking=False,
    supports_expert_map=False,
    needs_matching_quant=True,
 )
@@ -186,7 +182,6 @@ register_experts(
    standard_format,
    common_float_and_int_types,
    blocked_quantization_support=True,
-    supports_chunking=True,
    supports_expert_map=True,
    needs_matching_quant=True,
 )
@@ -196,7 +191,6 @@ register_experts(
    batched_format,
    common_float_and_int_types,
    blocked_quantization_support=True,
-    supports_chunking=False,
    supports_expert_map=True,
 )

@@ -262,7 +256,6 @@ if has_flashinfer_cutlass_fused_moe() and current_platform.has_device_capability
        standard_format,
        nvfp4_types + fp8_types,
        blocked_quantization_support=True,
-        supports_chunking=True,
        # Note: this is a hack to get it to run for now
        supports_expert_map=True,
    )
@@ -281,7 +274,6 @@ if has_aiter():
        standard_format,
        fp8_types,
        blocked_quantization_support=True,
-        supports_chunking=True,
        supports_expert_map=True,
        needs_aiter=True,
    )
@@ -294,7 +286,6 @@ if has_deep_gemm() and is_deep_gemm_supported():
        batched_format,
        fp8_types,
        blocked_quantization_support=True,
-        supports_chunking=False,
        supports_expert_map=False,
        needs_matching_quant=False,
        needs_deep_gemm=True,
@@ -304,7 +295,6 @@ if has_deep_gemm() and is_deep_gemm_supported():
        standard_format,
        fp8_types,
        blocked_quantization_support=True,
-        supports_chunking=True,
        supports_expert_map=True,
        needs_matching_quant=False,
        needs_deep_gemm=True,
@@ -314,7 +304,6 @@ if has_deep_gemm() and is_deep_gemm_supported():
        standard_format,
        common_float_and_int_types,
        blocked_quantization_support=True,
-        supports_chunking=True,
        supports_expert_map=True,
        needs_matching_quant=True,
        needs_deep_gemm=True,
@@ -331,7 +320,6 @@ if cutlass_fp8_supported():
        standard_format,
        fp8_types,
        blocked_quantization_support=False,
-        supports_chunking=True,
        supports_expert_map=False,
    )
    register_experts(
@@ -339,7 +327,6 @@ if cutlass_fp8_supported():
        batched_format,
        fp8_types,
        blocked_quantization_support=False,
-        supports_chunking=False,
        supports_expert_map=False,
    )
 else:
@@ -354,7 +341,6 @@ if cutlass_fp4_supported():
        standard_format,
        nvfp4_types,
        blocked_quantization_support=True,
-        supports_chunking=True,
        supports_expert_map=False,
    )
 else: