[MoE Refactor] Make SharedExperts class for use with DefaultMoERunner (#35153)

Signed-off-by: Bill Nell <bnell@redhat.com>
2026-04-01 09:44:08 -04:00
parent 5e30e9b9a9
commit 7cf56a59a2
34 changed files with 556 additions and 397 deletions
--- a/vllm/model_executor/models/transformers/moe.py
+++ b/vllm/model_executor/models/transformers/moe.py
@@ -94,6 +94,8 @@ def transformers_moe_forward(
    self = forward_context.no_compile_layers[layer_name]
    self._topk_ids = topk_ids
    # Clone hidden_states because it will be mutated in-place in FusedMoE
+    # TODO(bnell): figure out a way to avoid calling runner directly.
+    # it is a hack that the weight are being passed via logits.
    return self.runner.forward(hidden_states.clone(), topk_weights)