Support Llama 4 for fused_marlin_moe (#20457)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-07-04 16:55:10 +09:00
committed by GitHub
parent 1caca5a589
commit 0e3fe896e2
6 changed files with 11 additions and 17 deletions

View File

@@ -493,11 +493,6 @@ class AWQMoEMethod(FusedMoEMethodBase):
assert activation == "silu", "Only SiLU activation is supported."
if apply_router_weight_on_input:
raise NotImplementedError(
"Apply router weight on input is not supported for"
"fused Marlin MoE method.")
topk_weights, topk_ids = FusedMoE.select_experts(
hidden_states=x,
router_logits=router_logits,
@@ -520,6 +515,7 @@ class AWQMoEMethod(FusedMoEMethodBase):
topk_weights,
topk_ids,
quant_type_id=self.quant_type.id,
apply_router_weight_on_input=apply_router_weight_on_input,
global_num_experts=global_num_experts,
expert_map=expert_map,
w1_zeros=layer.w13_qzeros,