[MoE Refactor] Mxfp4 oracle rebased (#37128)

Signed-off-by: Yongye Zhu <zyy1102000@gmail.com>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Yongye Zhu
2026-03-20 22:37:04 -05:00
committed by GitHub
parent c7f98b4d0a
commit 87bd91892f
18 changed files with 1707 additions and 1381 deletions

View File

@@ -84,7 +84,10 @@ def run_e2e_fusion_test(monkeypatch, caplog_mp_spawn):
# TODO: remove this after finishing migration from envs to model kwargs
if model_name == "openai/gpt-oss-20b":
monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8", "1")
from .common import is_blackwell
if is_blackwell():
monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8", "1")
# Disable, compile cache to make sure custom passes run.
# Otherwise, we can't verify fusion happened through the logs.