[MoE Refactor] Migrate Unquantized to Full Oracle Flow (#36286)

Signed-off-by: Yifan Zong <yzong@redhat.com>
Signed-off-by: Robert Shaw <robshaw@redhat.com>
Signed-off-by: yzong-rh <yzong@redhat.com>
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
Co-authored-by: Robert Shaw <robshaw@redhat.com>
This commit is contained in:
yzong-rh
2026-03-31 15:43:33 -04:00
committed by GitHub
parent 598190aac3
commit d9b90a07ac
11 changed files with 618 additions and 514 deletions

View File

@@ -210,6 +210,13 @@ def test_gptoss_eager(monkeypatch: pytest.MonkeyPatch):
## Qwen3 Next ##
@pytest.mark.skip(
reason=(
"FLASHINFER TRTLLM MoE has a bug with all negative router logits "
"for models with RENORMALIZE. This will be re-enabled once the "
"issue is fixed in flashinfer."
)
)
def test_qwen3_next_bf16_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
can_initialize(
"Qwen/Qwen3-Next-80B-A3B-Instruct",