[Bugfix] Fix gpt-oss w4a8 DP/EP on B200 (#26729)

Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
Varun Sundar Rabindranath
2025-10-21 01:51:14 -04:00
committed by GitHub
parent f95da13c3d
commit 5ff5d94e77
5 changed files with 82 additions and 2 deletions

View File

@@ -170,3 +170,23 @@ def test_gptoss_mxfp4mxfp8_moe_flashinfer_cutlass(monkeypatch: pytest.MonkeyPatc
def test_gptoss_mxfp4mxfp8_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8", "1")
can_initialize("openai/gpt-oss-20b", hf_overrides=HF_OVERRIDE_TEXT)
def test_gptoss_dp2_mxfp4mxfp8_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8", "1")
monkeypatch.setenv("VLLM_ALL2ALL_BACKEND", "deepep_high_throughput")
can_initialize(
"openai/gpt-oss-20b",
extra_args=["--data-parallel-size", "2", "--enable-expert-parallel"],
hf_overrides=HF_OVERRIDE_TEXT,
)
def test_gptoss_dp2_mxfp4bf16_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_BF16", "1")
monkeypatch.setenv("VLLM_ALL2ALL_BACKEND", "deepep_high_throughput")
can_initialize(
"openai/gpt-oss-20b",
extra_args=["--data-parallel-size", "2", "--enable-expert-parallel"],
hf_overrides=HF_OVERRIDE_TEXT,
)