[fix]: disable cutlass block scaled group gemm for EP (#20781)

Signed-off-by: Duncan Moss <djm.moss@gmail.com>
This commit is contained in:
Duncan Moss
2025-07-10 19:39:18 -07:00
committed by GitHub
parent 0cf893cae1
commit 5923ab9524
3 changed files with 34 additions and 9 deletions

View File

@@ -1192,8 +1192,9 @@ def fused_experts(
apply_router_weight_on_input=apply_router_weight_on_input,
)
elif (allow_cutlass_block_scaled_grouped_gemm and use_fp8_w8a8
and _valid_cutlass_block_scaled_grouped_gemm(w1, w2)):
assert apply_router_weight_on_input is False
and _valid_cutlass_block_scaled_grouped_gemm(
w1, w2, inplace, activation, apply_router_weight_on_input,
expert_map)):
return run_cutlass_block_scaled_fused_experts(
a=hidden_states,
w1=w1,