[Kernels] Isolate modular kernel code from FusedMoEMethodBase subclasses. (#27123)

2025-11-04 08:59:45 -05:00
parent e4ee658672
commit 938772af03
16 changed files with 271 additions and 311 deletions
--- a/vllm/model_executor/layers/quantization/bitsandbytes.py
+++ b/vllm/model_executor/layers/quantization/bitsandbytes.py
@@ -518,12 +518,11 @@ class BitsAndBytesMoEMethod(FusedMoEMethodBase):
    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
        from vllm.model_executor.layers.fused_moe import fused_experts

-        assert self.fused_experts is None
-
        if enable_eplb:
            raise NotImplementedError(
                "EPLB not supported for `BitsAndBytesMoEMethod` yet."
            )
+
        topk_weights, topk_ids, _ = FusedMoE.select_experts(
            hidden_states=x,
            router_logits=router_logits,