use 'max_active_experts' for moe lora input size (#33197)
Signed-off-by: gnovack <gnovack@amazon.com>
This commit is contained in:
@@ -47,6 +47,8 @@ def test_moe_lora_align_block_size(
|
||||
# compute paddings
|
||||
max_num_tokens_padded = topk_ids.numel() + num_experts * (block_size - 1)
|
||||
max_num_tokens_padded = round_up(max_num_tokens_padded, block_size)
|
||||
if topk_ids.numel() < num_experts:
|
||||
max_num_tokens_padded = topk_ids.numel() * block_size
|
||||
max_num_m_blocks = CEILDIV(max_num_tokens_padded, block_size)
|
||||
|
||||
# init output tensors
|
||||
|
||||
Reference in New Issue
Block a user