[Bugfix] Fix gpt-oss w4a8 DP/EP on B200 (#26729)
Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
committed by
GitHub
parent
f95da13c3d
commit
5ff5d94e77
@@ -170,3 +170,23 @@ def test_gptoss_mxfp4mxfp8_moe_flashinfer_cutlass(monkeypatch: pytest.MonkeyPatc
|
||||
def test_gptoss_mxfp4mxfp8_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8", "1")
|
||||
can_initialize("openai/gpt-oss-20b", hf_overrides=HF_OVERRIDE_TEXT)
|
||||
|
||||
|
||||
def test_gptoss_dp2_mxfp4mxfp8_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8", "1")
|
||||
monkeypatch.setenv("VLLM_ALL2ALL_BACKEND", "deepep_high_throughput")
|
||||
can_initialize(
|
||||
"openai/gpt-oss-20b",
|
||||
extra_args=["--data-parallel-size", "2", "--enable-expert-parallel"],
|
||||
hf_overrides=HF_OVERRIDE_TEXT,
|
||||
)
|
||||
|
||||
|
||||
def test_gptoss_dp2_mxfp4bf16_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_BF16", "1")
|
||||
monkeypatch.setenv("VLLM_ALL2ALL_BACKEND", "deepep_high_throughput")
|
||||
can_initialize(
|
||||
"openai/gpt-oss-20b",
|
||||
extra_args=["--data-parallel-size", "2", "--enable-expert-parallel"],
|
||||
hf_overrides=HF_OVERRIDE_TEXT,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user