[Feature]: Remove Chunking From FusedMoE (#34086)

Signed-off-by: SouthWest7 <am1ao@qq.com> Signed-off-by: Southwest <1403572259@qq.com> Signed-off-by: southwest <am1ao@qq.com> Signed-off-by: Xinan Miao <1403572259@qq.com> Co-authored-by: SouthWest7 <am1ao@qq.com>
2026-03-13 02:24:38 +08:00
parent c973ecdead
commit 2cdf92228c
28 changed files with 152 additions and 523 deletions
--- a/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py
+++ b/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py
@@ -42,12 +42,6 @@ def rank_worker(
 ):
    set_random_seed(pgi.rank)

-    # sanity check
-    from vllm import envs
-
-    if config.fused_moe_chunk_size is not None:
-        assert config.fused_moe_chunk_size == envs.VLLM_FUSED_MOE_CHUNK_SIZE
-
    # get weights to this device
    weights.to_current_device()

@@ -135,7 +129,6 @@ def make_feature_matrix(csv_file_path: str):
            fused_experts_type=experts_type,
            quant_config=quant_config,
            world_size=2,
-            fused_moe_chunk_size=None,
        )

        success = None