Optimize moe_align_block_size for deepseek_v3 (#12850)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
@@ -596,7 +596,7 @@ def moe_align_block_size(
|
||||
dtype=torch.int32,
|
||||
device=topk_ids.device)
|
||||
if num_experts >= 224:
|
||||
if envs.VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON:
|
||||
if envs.VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON or num_experts != 256:
|
||||
moe_align_block_size_triton(
|
||||
topk_ids,
|
||||
num_experts,
|
||||
@@ -606,6 +606,7 @@ def moe_align_block_size(
|
||||
num_tokens_post_pad,
|
||||
)
|
||||
else:
|
||||
# Currently requires num_experts=256
|
||||
ops.sgl_moe_align_block_size(
|
||||
topk_ids,
|
||||
num_experts,
|
||||
|
||||
Reference in New Issue
Block a user