[MoE Refactor] Introduce MoERunner abstraction and move execution logic from FusedMoE to DefaultMoERunner (#32344)

Signed-off-by: Bill Nell <bnell@redhat.com>
2026-02-10 19:51:07 -05:00
parent dc6de33c3d
commit d1481ba783
25 changed files with 913 additions and 753 deletions
--- a/vllm/model_executor/layers/quantization/mxfp4.py
+++ b/vllm/model_executor/layers/quantization/mxfp4.py
@@ -900,6 +900,7 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
        x: torch.Tensor,
        topk_weights: torch.Tensor,
        topk_ids: torch.Tensor,
+        shared_experts_input: torch.Tensor | None,
    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
        assert not self.is_monolithic
        if layer.enable_eplb: