Change the type signature of MixtureOfExperts.expert_weights to MutableSequence[Sequence[Tensor]] (#33573)

Signed-off-by: Sage Moore <sagmoore@redhat.com>
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
This commit is contained in:
Sage Moore
2026-02-04 14:02:46 -08:00
committed by GitHub
parent 9f14c9224d
commit ce498a6d61
2 changed files with 18 additions and 9 deletions

View File

@@ -6,7 +6,7 @@ The actual execution of the rearrangement.
This involves the exchange of expert weights between GPUs.
"""
from collections.abc import Iterable, Sequence
from collections.abc import Sequence
from dataclasses import dataclass
import numpy as np
@@ -153,7 +153,7 @@ def move_to_buffer(
num_local_experts: int,
old_indices: np.ndarray,
new_indices: np.ndarray,
expert_weights: Iterable[torch.Tensor],
expert_weights: Sequence[torch.Tensor],
expert_weights_buffers: Sequence[torch.Tensor],
cuda_stream: torch.cuda.Stream | None,
ep_group: ProcessGroup,
@@ -355,7 +355,7 @@ def move_to_buffer(
def move_from_buffer(
expert_weights: Iterable[torch.Tensor],
expert_weights: Sequence[torch.Tensor],
expert_weights_buffers: list[torch.Tensor],
is_unchanged: np.ndarray,
is_received_locally: np.ndarray,
@@ -436,7 +436,7 @@ def move_from_buffer(
async def transfer_layer(
old_global_expert_indices: torch.Tensor,
new_global_expert_indices: torch.Tensor,
expert_weights: Sequence[Iterable[torch.Tensor]],
expert_weights: Sequence[Sequence[torch.Tensor]],
expert_weights_buffer: Sequence[torch.Tensor],
ep_group: ProcessGroup,
is_profile: bool = False,
@@ -488,7 +488,8 @@ async def transfer_layer(
assert old_global_expert_indices.shape[1] == new_global_expert_indices.shape[1]
num_moe_layers, num_physical_experts = old_global_expert_indices.shape
assert len(expert_weights) == num_moe_layers
num_local_physical_experts = next(iter(expert_weights[0])).shape[0]
assert len(expert_weights[0]) >= 1
num_local_physical_experts = expert_weights[0][0].shape[0]
assert new_global_expert_indices.shape == (num_moe_layers, num_physical_experts)
assert num_physical_experts == ep_size * num_local_physical_experts
@@ -510,7 +511,7 @@ async def transfer_layer(
def rearrange_expert_weights_inplace(
old_global_expert_indices: torch.Tensor,
new_global_expert_indices: torch.Tensor,
expert_weights: Sequence[Iterable[torch.Tensor]],
expert_weights: Sequence[Sequence[torch.Tensor]],
ep_group: ProcessGroup,
is_profile: bool = False,
rank_mapping: dict[int, int] | None = None,
@@ -553,8 +554,9 @@ def rearrange_expert_weights_inplace(
num_moe_layers, num_physical_experts = old_global_expert_indices.shape
assert len(expert_weights) == num_moe_layers
assert len(expert_weights[0]) >= 1
num_local_physical_experts = next(iter(expert_weights[0])).shape[0]
num_local_physical_experts = expert_weights[0][0].shape[0]
assert new_global_expert_indices.shape == (num_moe_layers, num_physical_experts)
ep_size = ep_group.size()

View File

@@ -2,7 +2,14 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio
from collections.abc import AsyncGenerator, Callable, Iterable, Mapping, MutableSequence
from collections.abc import (
AsyncGenerator,
Callable,
Iterable,
Mapping,
MutableSequence,
Sequence,
)
from contextlib import ExitStack, contextmanager, nullcontext
from typing import (
TYPE_CHECKING,
@@ -818,7 +825,7 @@ class MixtureOfExperts(Protocol):
Check if the model is a mixture of experts (MoE) model.
"""
expert_weights: MutableSequence[Iterable[Tensor]]
expert_weights: MutableSequence[Sequence[Tensor]]
"""
Expert weights saved in this rank.