[fix]: disable cutlass block scaled group gemm for EP (#20781)
Signed-off-by: Duncan Moss <djm.moss@gmail.com>
This commit is contained in:
@@ -1192,8 +1192,9 @@ def fused_experts(
|
||||
apply_router_weight_on_input=apply_router_weight_on_input,
|
||||
)
|
||||
elif (allow_cutlass_block_scaled_grouped_gemm and use_fp8_w8a8
|
||||
and _valid_cutlass_block_scaled_grouped_gemm(w1, w2)):
|
||||
assert apply_router_weight_on_input is False
|
||||
and _valid_cutlass_block_scaled_grouped_gemm(
|
||||
w1, w2, inplace, activation, apply_router_weight_on_input,
|
||||
expert_map)):
|
||||
return run_cutlass_block_scaled_fused_experts(
|
||||
a=hidden_states,
|
||||
w1=w1,
|
||||
|
||||
Reference in New Issue
Block a user