[Log] DeepGEMM Update Log for Unaligned Problem Size (#22208)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wentao Ye
2025-08-04 22:13:19 -04:00
committed by GitHub
parent 6fa41e0c32
commit d7b28f3415
3 changed files with 23 additions and 8 deletions

View File

@@ -1360,10 +1360,8 @@ def fused_experts(
# E8M0 scale, which means we requantize the weight and input to the specific
# scale. Fallen back to cutlass or triton for some cases would cause
# accuracy issue.
N = w1.size(1)
should_use_deep_gemm = ((N > 512
and _valid_deep_gemm(hidden_states, w1, w2))
or is_blackwell_deep_gemm_used())
should_use_deep_gemm = is_blackwell_deep_gemm_used() or _valid_deep_gemm(
hidden_states, w1, w2)
if (allow_deep_gemm and use_fp8_w8a8 and should_use_deep_gemm):
assert apply_router_weight_on_input is False
assert is_act_and_mul, (