diff --git a/tests/kernels/moe/utils.py b/tests/kernels/moe/utils.py index 6cf01ac47..ef72b96be 100644 --- a/tests/kernels/moe/utils.py +++ b/tests/kernels/moe/utils.py @@ -7,11 +7,6 @@ import vllm._custom_ops as ops from tests.kernels.quant_utils import per_block_cast_to_int8 from tests.kernels.quantization.nvfp4_utils import FLOAT4_E2M1_MAX, FLOAT8_E4M3_MAX from vllm.model_executor.layers.activation import SiluAndMul -from vllm.model_executor.layers.fused_moe import ( - TritonExperts, - fused_experts, - fused_topk, -) from vllm.model_executor.layers.fused_moe.activation import MoEActivation from vllm.model_executor.layers.fused_moe.config import ( FusedMoEConfig, @@ -24,10 +19,15 @@ from vllm.model_executor.layers.fused_moe.fused_batched_moe import ( BatchedTritonExperts, NaiveBatchedExperts, ) +from vllm.model_executor.layers.fused_moe.fused_moe import ( + TritonExperts, + fused_experts, +) from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularKernel from vllm.model_executor.layers.fused_moe.prepare_finalize import ( MoEPrepareAndFinalizeNoEP, ) +from vllm.model_executor.layers.fused_moe.router.fused_topk_router import fused_topk from vllm.model_executor.layers.fused_moe.utils import moe_kernel_quantize_input from vllm.utils.deep_gemm import per_block_cast_to_fp8 from vllm.utils.math_utils import round_up