Fix Llama4 FlashInfer FP4 MoE issues (#22511)
Signed-off-by: Po-Han Huang <pohanh@nvidia.com>
This commit is contained in:
committed by
GitHub
parent
f7ad6a1eb3
commit
67c153b88a
@@ -1299,8 +1299,9 @@ class ModelOptNvFp4FusedMoE(FusedMoEMethodBase):
|
||||
output2_scale_scalar=layer.g2_alphas.data,
|
||||
num_experts=global_num_experts,
|
||||
top_k=top_k,
|
||||
n_group=num_expert_group,
|
||||
topk_group=topk_group,
|
||||
n_group=num_expert_group
|
||||
if num_expert_group is not None else 0,
|
||||
topk_group=topk_group if topk_group is not None else 0,
|
||||
intermediate_size=layer.intermediate_size_per_partition,
|
||||
local_expert_offset=layer.ep_rank * layer.local_num_experts,
|
||||
local_num_experts=layer.local_num_experts,
|
||||
|
||||
Reference in New Issue
Block a user