[Kernels] Isolate modular kernel code from FusedMoEMethodBase subclasses. (#27123)

This commit is contained in:
bnellnm
2025-11-04 08:59:45 -05:00
committed by GitHub
parent e4ee658672
commit 938772af03
16 changed files with 271 additions and 311 deletions

View File

@@ -518,12 +518,11 @@ class BitsAndBytesMoEMethod(FusedMoEMethodBase):
) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
from vllm.model_executor.layers.fused_moe import fused_experts
assert self.fused_experts is None
if enable_eplb:
raise NotImplementedError(
"EPLB not supported for `BitsAndBytesMoEMethod` yet."
)
topk_weights, topk_ids, _ = FusedMoE.select_experts(
hidden_states=x,
router_logits=router_logits,