[MoE Refactor] Separate Router into OO Classes (#30623)
Signed-off-by: Bill Nell <bnell@redhat.com>
This commit is contained in:
@@ -14,6 +14,7 @@ from vllm.model_executor.layers.fused_moe import (
|
||||
FusedMoE,
|
||||
FusedMoEConfig,
|
||||
FusedMoEMethodBase,
|
||||
FusedMoERouter,
|
||||
)
|
||||
from vllm.model_executor.layers.fused_moe import modular_kernel as mk
|
||||
from vllm.model_executor.layers.fused_moe.config import (
|
||||
@@ -27,7 +28,6 @@ from vllm.model_executor.layers.fused_moe.fused_marlin_moe import (
|
||||
MarlinExperts,
|
||||
fused_marlin_moe,
|
||||
)
|
||||
from vllm.model_executor.layers.fused_moe.fused_moe_router import FusedMoERouter
|
||||
from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (
|
||||
OAITritonExperts,
|
||||
UnfusedOAITritonExperts,
|
||||
@@ -936,9 +936,9 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
|
||||
layer.apply_router_weight_on_input,
|
||||
layer.scoring_func,
|
||||
layer.activation,
|
||||
layer.expert_load_view,
|
||||
layer.logical_to_physical_map,
|
||||
layer.logical_replica_count,
|
||||
layer.eplb_state.expert_load_view,
|
||||
layer.eplb_state.logical_to_physical_map,
|
||||
layer.eplb_state.logical_replica_count,
|
||||
), "MXFP4 are not supported with this configuration."
|
||||
|
||||
if (
|
||||
|
||||
Reference in New Issue
Block a user