[Kernels] Isolate modular kernel code from FusedMoEMethodBase subclasses. (#27123)
This commit is contained in:
@@ -13,7 +13,7 @@ import vllm.envs as envs
|
||||
from vllm.distributed.parallel_state import get_dp_group
|
||||
from vllm.model_executor.layers.fused_moe.deep_gemm_moe import DeepGemmExperts
|
||||
from vllm.model_executor.layers.fused_moe.deep_gemm_utils import compute_aligned_M
|
||||
from vllm.model_executor.layers.fused_moe.layer import FusedMoE
|
||||
from vllm.model_executor.layers.fused_moe.layer import FusedMoE, FusedMoEModularMethod
|
||||
from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularKernel
|
||||
from vllm.model_executor.layers.fused_moe.triton_deep_gemm_moe import (
|
||||
TritonOrDeepGemmExperts,
|
||||
@@ -160,8 +160,8 @@ def _fused_moe_grouped_gemm_may_use_deep_gemm(module: torch.nn.Module) -> bool:
|
||||
):
|
||||
return False
|
||||
|
||||
if not isinstance(module.quant_method.fused_experts, FusedMoEModularKernel):
|
||||
# fused_experts could invoke deep_gemm_moe_fp8
|
||||
if not isinstance(module.quant_method, FusedMoEModularMethod):
|
||||
# modular kernels could invoke deep_gemm_moe_fp8
|
||||
return True
|
||||
|
||||
mk: FusedMoEModularKernel = module.quant_method.fused_experts
|
||||
|
||||
Reference in New Issue
Block a user