[Misc] DP : Add ExpertTokensMetadata (#20332)

Signed-off-by: Varun <vsundarr@redhat.com> Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Varun <vsundarr@redhat.com>
2025-07-09 20:33:14 -04:00
parent b7d9e9416f
commit 805d62ca88
12 changed files with 117 additions and 79 deletions
--- a/vllm/model_executor/layers/fused_moe/cutlass_moe.py
+++ b/vllm/model_executor/layers/fused_moe/cutlass_moe.py
@@ -303,11 +303,17 @@ class CutlassExpertsFp8(mk.FusedMoEPermuteExpertsUnpermute):
        a2_scale: Optional[torch.Tensor],
        workspace13: torch.Tensor,
        workspace2: torch.Tensor,
-        expert_num_tokens: Optional[torch.Tensor],
+        expert_tokens_meta: Optional[mk.ExpertTokensMetadata],
    ):
        assert w1_zp is None, "w1_zp is not supported in CUTLASS MoE"
        assert w2_zp is None, "w2_zp is not supported in CUTLASS MoE"
+
+        expert_num_tokens = None
+        if expert_tokens_meta is not None:
+            expert_num_tokens = expert_tokens_meta.expert_num_tokens
+
        activation_callable = lambda o, i: self.activation(activation, o, i)
+
        in_dtype = hidden_states.dtype
        run_cutlass_moe_fp8(
            output, hidden_states, w1, w2, topk_ids, activation_callable,