[BugFix][Performance] Restore flashinfer autotuning for all scenarios (#27904)

This commit is contained in:
Varun Sundar Rabindranath
2025-11-04 02:56:21 -05:00
committed by GitHub
parent 53f6e81dfd
commit 4022a9d279
4 changed files with 14 additions and 44 deletions

View File

@@ -172,21 +172,9 @@ def test_gptoss_mxfp4mxfp8_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch
can_initialize("openai/gpt-oss-20b", hf_overrides=HF_OVERRIDE_TEXT)
def test_gptoss_dp2_mxfp4mxfp8_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8", "1")
monkeypatch.setenv("VLLM_ALL2ALL_BACKEND", "deepep_high_throughput")
def test_gptoss_eager(monkeypatch: pytest.MonkeyPatch):
can_initialize(
"openai/gpt-oss-20b",
extra_args=["--data-parallel-size", "2", "--enable-expert-parallel"],
hf_overrides=HF_OVERRIDE_TEXT,
)
def test_gptoss_dp2_mxfp4bf16_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_BF16", "1")
monkeypatch.setenv("VLLM_ALL2ALL_BACKEND", "deepep_high_throughput")
can_initialize(
"openai/gpt-oss-20b",
extra_args=["--data-parallel-size", "2", "--enable-expert-parallel"],
hf_overrides=HF_OVERRIDE_TEXT,
extra_args=["--enforce-eager"],
)