[BugFix][Performance] Restore flashinfer autotuning for all scenarios (#27904)
This commit is contained in:
committed by
GitHub
parent
53f6e81dfd
commit
4022a9d279
@@ -172,21 +172,9 @@ def test_gptoss_mxfp4mxfp8_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch
|
||||
can_initialize("openai/gpt-oss-20b", hf_overrides=HF_OVERRIDE_TEXT)
|
||||
|
||||
|
||||
def test_gptoss_dp2_mxfp4mxfp8_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8", "1")
|
||||
monkeypatch.setenv("VLLM_ALL2ALL_BACKEND", "deepep_high_throughput")
|
||||
def test_gptoss_eager(monkeypatch: pytest.MonkeyPatch):
|
||||
can_initialize(
|
||||
"openai/gpt-oss-20b",
|
||||
extra_args=["--data-parallel-size", "2", "--enable-expert-parallel"],
|
||||
hf_overrides=HF_OVERRIDE_TEXT,
|
||||
)
|
||||
|
||||
|
||||
def test_gptoss_dp2_mxfp4bf16_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_BF16", "1")
|
||||
monkeypatch.setenv("VLLM_ALL2ALL_BACKEND", "deepep_high_throughput")
|
||||
can_initialize(
|
||||
"openai/gpt-oss-20b",
|
||||
extra_args=["--data-parallel-size", "2", "--enable-expert-parallel"],
|
||||
hf_overrides=HF_OVERRIDE_TEXT,
|
||||
extra_args=["--enforce-eager"],
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user