[Kernel] DeepEP dispatch-combine kernel integration (#18434)

Signed-off-by: Varun <vsundarr@redhat.com>
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
This commit is contained in:
Varun Sundar Rabindranath
2025-06-03 15:30:02 -04:00
committed by GitHub
parent 01eee40536
commit fa98d77773
23 changed files with 1950 additions and 122 deletions

View File

@@ -1164,7 +1164,7 @@ def fused_experts(hidden_states: torch.Tensor,
# permute/unpermute ops are available.
N = w1.shape[1]
if (allow_deep_gemm and use_fp8_w8a8 and N > 512
and _valid_deep_gemm(hidden_states, w1, w2, expert_map)):
and _valid_deep_gemm(hidden_states, w1, w2)):
assert apply_router_weight_on_input is False
return deep_gemm_moe_fp8(
hidden_states=hidden_states,