[Kernel] DeepEP dispatch-combine kernel integration (#18434)
Signed-off-by: Varun <vsundarr@redhat.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
This commit is contained in:
committed by
GitHub
parent
01eee40536
commit
fa98d77773
@@ -1164,7 +1164,7 @@ def fused_experts(hidden_states: torch.Tensor,
|
||||
# permute/unpermute ops are available.
|
||||
N = w1.shape[1]
|
||||
if (allow_deep_gemm and use_fp8_w8a8 and N > 512
|
||||
and _valid_deep_gemm(hidden_states, w1, w2, expert_map)):
|
||||
and _valid_deep_gemm(hidden_states, w1, w2)):
|
||||
assert apply_router_weight_on_input is False
|
||||
return deep_gemm_moe_fp8(
|
||||
hidden_states=hidden_states,
|
||||
|
||||
Reference in New Issue
Block a user