[Feat] Refactor for parallel_config in FusedMoEModularKernel (#30282)
Signed-off-by: yewentao256 <zhyanwentao@126.com> Signed-off-by: Robert Shaw <robshaw@redhat.com> Co-authored-by: Robert Shaw <robshaw@redhat.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
This commit is contained in:
@@ -10,10 +10,12 @@ from typing import final
|
||||
import torch
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.config import ParallelConfig, get_current_vllm_config
|
||||
from vllm.forward_context import get_forward_context, is_forward_context_available
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
|
||||
from vllm.model_executor.layers.fused_moe.config import (
|
||||
FusedMoEParallelConfig,
|
||||
FusedMoEQuantConfig,
|
||||
)
|
||||
from vllm.model_executor.layers.fused_moe.utils import (
|
||||
_resize_cache,
|
||||
count_expert_num_tokens,
|
||||
@@ -681,7 +683,7 @@ class FusedMoEModularKernel(torch.nn.Module):
|
||||
fused_experts: FusedMoEPermuteExpertsUnpermute,
|
||||
shared_experts: torch.nn.Module | None = None,
|
||||
shared_experts_stream: torch.cuda.Stream | None = None,
|
||||
parallel_config: ParallelConfig | None = None,
|
||||
moe_parallel_config: FusedMoEParallelConfig | None = None,
|
||||
):
|
||||
super().__init__()
|
||||
self.prepare_finalize = prepare_finalize
|
||||
@@ -689,12 +691,15 @@ class FusedMoEModularKernel(torch.nn.Module):
|
||||
self.shared_experts = shared_experts
|
||||
self.shared_experts_stream = shared_experts_stream
|
||||
|
||||
# cache whether this worker is using DP+EP
|
||||
if parallel_config is None:
|
||||
parallel_config = get_current_vllm_config().parallel_config
|
||||
# prefer an explicit FusedMoEParallelConfig when available (from
|
||||
# FusedMoE layers / tests).
|
||||
# if not provided, assume this kernel is
|
||||
# running in a non-DP+EP context
|
||||
self.moe_parallel_config: FusedMoEParallelConfig | None = moe_parallel_config
|
||||
self.is_dp_ep = (
|
||||
parallel_config.data_parallel_size > 1
|
||||
and parallel_config.enable_expert_parallel
|
||||
moe_parallel_config is not None
|
||||
and moe_parallel_config.dp_size > 1
|
||||
and moe_parallel_config.use_ep
|
||||
)
|
||||
|
||||
self._post_init_setup()
|
||||
|
||||
Reference in New Issue
Block a user