[Kernels] Overlap shared experts with send/recv (#23273)

Signed-off-by: Bill Nell <bnell@redhat.com>
2025-09-03 12:35:18 -04:00
parent fa4311d85f
commit e9b92dcd89
32 changed files with 885 additions and 227 deletions
--- a/vllm/distributed/device_communicators/all2all.py
+++ b/vllm/distributed/device_communicators/all2all.py
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from typing import TYPE_CHECKING, Any
+from typing import Any

 import torch
 import torch.distributed as dist
@@ -13,11 +13,6 @@ from .base_device_communicator import All2AllManagerBase, Cache

 logger = init_logger(__name__)

-if TYPE_CHECKING:
-    from vllm.model_executor.layers.fused_moe.layer import FusedMoE
-else:
-    FusedMoE = None
-

 class NaiveAll2AllManager(All2AllManagerBase):
    """
--- a/vllm/distributed/device_communicators/base_device_communicator.py
+++ b/vllm/distributed/device_communicators/base_device_communicator.py
@@ -252,7 +252,10 @@ class DeviceCommunicatorBase:

        moe_modules = [
            module for module in model.modules()
-            if module.__class__.__name__ == "FusedMoE"
+            # TODO(bnell): Should use isinstance but can't.  Maybe search for
+            # presence of quant_method.init_prepare_finalize?
+            if (module.__class__.__name__ == "FusedMoE"
+                or module.__class__.__name__ == "SharedFusedMoE")
        ]
        for module in moe_modules:
            module.quant_method.init_prepare_finalize(module)