[MoE Refactor] Migrate Unquantized to Full Oracle Flow (#36286)
Signed-off-by: Yifan Zong <yzong@redhat.com> Signed-off-by: Robert Shaw <robshaw@redhat.com> Signed-off-by: yzong-rh <yzong@redhat.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> Co-authored-by: Robert Shaw <robshaw@redhat.com>
This commit is contained in:
@@ -210,6 +210,13 @@ def test_gptoss_eager(monkeypatch: pytest.MonkeyPatch):
|
||||
## Qwen3 Next ##
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason=(
|
||||
"FLASHINFER TRTLLM MoE has a bug with all negative router logits "
|
||||
"for models with RENORMALIZE. This will be re-enabled once the "
|
||||
"issue is fixed in flashinfer."
|
||||
)
|
||||
)
|
||||
def test_qwen3_next_bf16_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
|
||||
can_initialize(
|
||||
"Qwen/Qwen3-Next-80B-A3B-Instruct",
|
||||
|
||||
Reference in New Issue
Block a user