diff --git a/tests/kernels/moe/test_cutedsl_moe.py b/tests/kernels/moe/test_cutedsl_moe.py index 66a97b48b..bca3eba0f 100644 --- a/tests/kernels/moe/test_cutedsl_moe.py +++ b/tests/kernels/moe/test_cutedsl_moe.py @@ -17,7 +17,7 @@ from flashinfer import fp4_quantize from torch.nn import functional as F from vllm.model_executor.layers.activation import SiluAndMul -from vllm.model_executor.layers.fused_moe.flashinfer_cutedsl_moe import ( +from vllm.model_executor.layers.fused_moe.experts.flashinfer_cutedsl_moe import ( flashinfer_cutedsl_moe_masked, ) from vllm.utils.flashinfer import ( diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_cutedsl_moe.py b/vllm/model_executor/layers/fused_moe/experts/flashinfer_cutedsl_moe.py similarity index 100% rename from vllm/model_executor/layers/fused_moe/flashinfer_cutedsl_moe.py rename to vllm/model_executor/layers/fused_moe/experts/flashinfer_cutedsl_moe.py diff --git a/vllm/model_executor/layers/fused_moe/oracle/nvfp4.py b/vllm/model_executor/layers/fused_moe/oracle/nvfp4.py index 031aca388..35451e87d 100644 --- a/vllm/model_executor/layers/fused_moe/oracle/nvfp4.py +++ b/vllm/model_executor/layers/fused_moe/oracle/nvfp4.py @@ -86,7 +86,7 @@ def backend_to_kernel_cls( return [FlashInferExperts] elif backend == NvFp4MoeBackend.FLASHINFER_CUTEDSL: - from vllm.model_executor.layers.fused_moe.flashinfer_cutedsl_moe import ( + from vllm.model_executor.layers.fused_moe.experts.flashinfer_cutedsl_moe import ( # noqa: E501 FlashInferCuteDSLExperts, )