use 'max_active_experts' for moe lora input size (#33197)

Signed-off-by: gnovack <gnovack@amazon.com>
This commit is contained in:
gnovack
2026-02-26 19:50:43 -08:00
committed by GitHub
parent 1e5ad9b74f
commit a532c83849
2 changed files with 4 additions and 0 deletions

View File

@@ -47,6 +47,8 @@ def test_moe_lora_align_block_size(
# compute paddings
max_num_tokens_padded = topk_ids.numel() + num_experts * (block_size - 1)
max_num_tokens_padded = round_up(max_num_tokens_padded, block_size)
if topk_ids.numel() < num_experts:
max_num_tokens_padded = topk_ids.numel() * block_size
max_num_m_blocks = CEILDIV(max_num_tokens_padded, block_size)
# init output tensors