[Misc][Refactor] Add FusedMoERouter object (#30519)

Signed-off-by: Bill Nell <bnell@redhat.com>
This commit is contained in:
bnellnm
2026-01-08 15:52:55 -05:00
committed by GitHub
parent aa125ecf0e
commit e74698c27a
20 changed files with 165 additions and 36 deletions

View File

@@ -15,6 +15,7 @@ from vllm.model_executor.layers.fused_moe.config import (
FusedMoEQuantConfig,
)
from vllm.model_executor.layers.fused_moe.fused_marlin_moe import fused_marlin_moe
from vllm.model_executor.layers.fused_moe.fused_moe_router import FusedMoERouter
from vllm.model_executor.layers.fused_moe.layer import (
FusedMoE,
FusedMoEMethodBase,
@@ -895,12 +896,13 @@ class GPTQMarlinMoEMethod(FusedMoEMethodBase):
def apply(
self,
layer: FusedMoE,
router: FusedMoERouter,
x: torch.Tensor,
router_logits: torch.Tensor,
) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
assert layer.activation == "silu", "Only SiLU activation is supported."
topk_weights, topk_ids = layer.select_experts(
topk_weights, topk_ids = router.select_experts(
hidden_states=x,
router_logits=router_logits,
)