[Refactor] EPLB rebalance algo to NumPy (#30697)

Signed-off-by: ilmarkov <markovilya197@gmail.com>
This commit is contained in:
Ilya Markov
2026-01-12 19:13:23 +01:00
committed by GitHub
parent 3d962d72ab
commit 1eb61ab34b
3 changed files with 126 additions and 128 deletions

View File

@@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import numpy as np
import pytest
import torch
@@ -312,9 +313,9 @@ if __name__ == "__main__":
test_basic_rebalance()
def _make_phy_replicas_idx_from_phy2log(phy2log: torch.Tensor) -> torch.Tensor:
"""Create replicas indices mapping from phy2log"""
pr = torch.zeros_like(phy2log)
def _make_phy_replicas_idx_from_phy2log(phy2log: np.ndarray) -> np.ndarray:
"""Create replicas indices mapping from phy2log."""
pr = np.zeros_like(phy2log, dtype=np.int64)
for layer in range(phy2log.shape[0]):
seen: dict[int, int] = {}
row = phy2log[layer].tolist()
@@ -326,11 +327,11 @@ def _make_phy_replicas_idx_from_phy2log(phy2log: torch.Tensor) -> torch.Tensor:
def _validate_intragpu_rearrangement(
old_global_expert_indices: torch.Tensor,
new_phy2log: torch.Tensor,
new_phy_replicas_idx: torch.Tensor,
post_phy2log: torch.Tensor,
post_phy_replicas_idx: torch.Tensor,
old_global_expert_indices: np.ndarray,
new_phy2log: np.ndarray,
new_phy_replicas_idx: np.ndarray,
post_phy2log: np.ndarray,
post_phy_replicas_idx: np.ndarray,
num_ranks: int,
slots_per_gpu: int,
):
@@ -345,7 +346,7 @@ def _validate_intragpu_rearrangement(
post_rnk = post_phy_replicas_idx[0, start:end]
# Pairwise equality for (expert, rank) pairs to ensure nothing is lost
def sorted_pairs(seg: torch.Tensor, rnk: torch.Tensor):
def sorted_pairs(seg, rnk):
pairs = list(zip(seg.tolist(), rnk.tolist()))
pairs.sort()
return pairs
@@ -386,8 +387,8 @@ def _validate_intragpu_rearrangement(
# GPU0 new -> [1,5,0,4]; GPU1 new -> [6,2,7,3]
2,
4,
torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7]]),
torch.tensor([[1, 5, 0, 4, 6, 2, 7, 3]]),
np.array([[0, 1, 2, 3, 4, 5, 6, 7]]),
np.array([[1, 5, 0, 4, 6, 2, 7, 3]]),
id="simple",
),
pytest.param(
@@ -401,8 +402,8 @@ def _validate_intragpu_rearrangement(
# GPU1 new -> [6, 2, 3, 2, 1] (expert 2 duplicated)
2,
5,
torch.tensor([[0, 1, 0, 2, 3, 4, 5, 6, 1, 2]]),
torch.tensor([[0, 5, 4, 0, 1, 6, 2, 3, 2, 1]]),
np.array([[0, 1, 0, 2, 3, 4, 5, 6, 1, 2]]),
np.array([[0, 5, 4, 0, 1, 6, 2, 3, 2, 1]]),
id="duplicates",
),
pytest.param(
@@ -418,8 +419,8 @@ def _validate_intragpu_rearrangement(
# GPU2 new -> [1, 2, 3, 0]
3,
4,
torch.tensor([[0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]]),
torch.tensor([[0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0]]),
np.array([[0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]]),
np.array([[0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0]]),
id="skewed_expert",
),
],