[MoE] Move DEEP_GEMM into experts/ subdirectory (#39005)

Signed-off-by: Jackmin801 <ongjackm@gmail.com>
Signed-off-by: Robert Shaw <robshaw@redhat.com>
Co-authored-by: Robert Shaw <robshaw@redhat.com>
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
This commit is contained in:
Jackmin801
2026-04-08 12:23:08 -07:00
committed by GitHub
parent 8477fe427d
commit a776a48b1c
14 changed files with 24 additions and 20 deletions

View File

@@ -7,14 +7,14 @@ import torch
# Fused experts and PrepareFinalize imports
import vllm.model_executor.layers.fused_moe.modular_kernel as mk
from vllm.model_executor.layers.fused_moe import TritonExperts
from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
BatchedDeepGemmExperts,
)
from vllm.model_executor.layers.fused_moe.config import (
FusedMoEConfig,
FusedMoEQuantConfig,
)
from vllm.model_executor.layers.fused_moe.deep_gemm_moe import DeepGemmExperts
from vllm.model_executor.layers.fused_moe.experts.batched_deep_gemm_moe import (
BatchedDeepGemmExperts,
)
from vllm.model_executor.layers.fused_moe.experts.deep_gemm_moe import DeepGemmExperts
from vllm.model_executor.layers.fused_moe.fused_batched_moe import (
BatchedTritonExperts,
NaiveBatchedExperts,

View File

@@ -5,10 +5,10 @@ import pytest
import torch
from vllm.model_executor.layers.fused_moe.activation import MoEActivation
from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
from vllm.model_executor.layers.fused_moe.config import fp8_w8a8_moe_quant_config
from vllm.model_executor.layers.fused_moe.experts.batched_deep_gemm_moe import (
BatchedDeepGemmExperts,
)
from vllm.model_executor.layers.fused_moe.config import fp8_w8a8_moe_quant_config
from vllm.model_executor.layers.fused_moe.fused_batched_moe import (
BatchedPrepareAndFinalize,
BatchedTritonExperts,

View File

@@ -28,7 +28,7 @@ from vllm.model_executor.layers.fused_moe.all2all_utils import (
from vllm.model_executor.layers.fused_moe.config import (
fp8_w8a8_moe_quant_config,
)
from vllm.model_executor.layers.fused_moe.deep_gemm_moe import (
from vllm.model_executor.layers.fused_moe.experts.deep_gemm_moe import (
_valid_deep_gemm_shape,
)
from vllm.model_executor.layers.fused_moe.triton_deep_gemm_moe import (

View File

@@ -47,10 +47,12 @@ if has_deep_ep():
from .parallel_utils import DeepEPHTArgs, DeepEPLLArgs, make_deepep_a2a
if has_deep_gemm():
from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
from vllm.model_executor.layers.fused_moe.experts.batched_deep_gemm_moe import (
BatchedDeepGemmExperts,
)
from vllm.model_executor.layers.fused_moe.deep_gemm_moe import DeepGemmExperts
from vllm.model_executor.layers.fused_moe.experts.deep_gemm_moe import (
DeepGemmExperts,
)
requires_deep_ep = pytest.mark.skipif(
not has_deep_ep(),

View File

@@ -175,7 +175,7 @@ def test_deepgemm_vs_triton(m, n, k, topk, num_experts, monkeypatch, workspace_i
mp.setenv("VLLM_USE_DEEP_GEMM", "1")
_DeepGemmExperts = importlib.import_module(
"vllm.model_executor.layers.fused_moe.deep_gemm_moe"
"vllm.model_executor.layers.fused_moe.experts.deep_gemm_moe"
).DeepGemmExperts
call_counter = {"cnt": 0}

View File

@@ -7,7 +7,7 @@ import random
import pytest
import torch
from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
from vllm.model_executor.layers.fused_moe.experts.batched_deep_gemm_moe import (
persistent_masked_m_silu_mul_quant,
)
from vllm.model_executor.layers.quantization.utils.quant_utils import (