[Feature]: Remove Chunking From FusedMoE (#34086)

Signed-off-by: SouthWest7 <am1ao@qq.com>
Signed-off-by: Southwest <1403572259@qq.com>
Signed-off-by: southwest <am1ao@qq.com>
Signed-off-by: Xinan Miao <1403572259@qq.com>
Co-authored-by: SouthWest7 <am1ao@qq.com>
This commit is contained in:
Xinan Miao
2026-03-13 02:24:38 +08:00
committed by GitHub
parent c973ecdead
commit 2cdf92228c
28 changed files with 152 additions and 523 deletions

View File

@@ -204,7 +204,6 @@ def test_flashinfer_per_tensor_moe_fp8_no_graph(
if not current_platform.has_device_capability(100):
pytest.skip("Test is only supported for sm >= 100")
set_random_seed(7)
monkeypatch.setenv("VLLM_FUSED_MOE_CHUNK_SIZE", "8192")
with set_current_vllm_config(vllm_config):
td = TestData.make_moe_tensors_8bit(
m, k, n, e, is_trtllm=True, activation=activation
@@ -289,7 +288,6 @@ def test_flashinfer_cutlass_moe_fp8_no_graph(
workspace_init,
):
set_random_seed(7)
monkeypatch.setenv("VLLM_FUSED_MOE_CHUNK_SIZE", "8192")
with set_current_vllm_config(vllm_config):
td = TestData.make_moe_tensors_8bit(
m, k, n, e, is_trtllm=False, activation=activation