Optimize moe_align_block_size for deepseek_v3 (#12850)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-02-13 18:43:37 -05:00
committed by GitHub
parent bffddd9a05
commit 2344192a55
2 changed files with 38 additions and 15 deletions

View File

@@ -596,7 +596,7 @@ def moe_align_block_size(
dtype=torch.int32,
device=topk_ids.device)
if num_experts >= 224:
if envs.VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON:
if envs.VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON or num_experts != 256:
moe_align_block_size_triton(
topk_ids,
num_experts,
@@ -606,6 +606,7 @@ def moe_align_block_size(
num_tokens_post_pad,
)
else:
# Currently requires num_experts=256
ops.sgl_moe_align_block_size(
topk_ids,
num_experts,