[Kernels] Overlap shared experts with send/recv (#23273)

Signed-off-by: Bill Nell <bnell@redhat.com>
2025-09-03 12:35:18 -04:00
parent fa4311d85f
commit e9b92dcd89
32 changed files with 885 additions and 227 deletions
--- a/vllm/model_executor/models/glm4_moe.py
+++ b/vllm/model_executor/models/glm4_moe.py
@@ -184,6 +184,8 @@ class Glm4MoE(nn.Module):

        if self.n_shared_experts is not None:
            shared_output = self.shared_experts(hidden_states)
+        else:
+            shared_output = None
        router_logits = self.gate(hidden_states.to(dtype=torch.float32))
        final_hidden_states = self.experts(
            hidden_states=hidden_states,