From 6ff16b77f8c0d15735829e5e85b1c50972536c59 Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Thu, 29 Jan 2026 15:15:17 -0500
Subject: [PATCH] [Bugfix] Enable Triton MoE for FP8 per-tensor dynamic
 (#33300)

Signed-off-by: mgoin <mgoin64@gmail.com>
(cherry picked from commit bfb9bdaf3f44fdcd4c6ff66b5e8c84834b092534)
---
 vllm/model_executor/layers/fused_moe/fused_batched_moe.py | 1 +
 vllm/model_executor/layers/fused_moe/fused_moe.py         | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py
index 509bacfbc..fd6c365fe 100644
--- a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py
@@ -927,6 +927,7 @@ class BatchedTritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
         SUPPORTED_W_A_FP8 = [
             (kFp8Static128BlockSym, kFp8Dynamic128Sym),
             (kFp8StaticChannelSym, kFp8DynamicTokenSym),
+            (kFp8StaticTensorSym, kFp8DynamicTokenSym),
             (kFp8StaticTensorSym, kFp8StaticTensorSym),
             (kFp8StaticTensorSym, kFp8DynamicTensorSym),
         ]
diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
index 669a6e74b..0335339b7 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -45,6 +45,7 @@ from vllm.model_executor.layers.quantization.utils.ocp_mx_utils import OCP_MX_Sc
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     QuantKey,
     kFp8Dynamic128Sym,
+    kFp8DynamicTensorSym,
     kFp8DynamicTokenSym,
     kFp8Static128BlockSym,
     kFp8StaticChannelSym,
@@ -1942,6 +1943,7 @@ class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
             (kFp8StaticChannelSym, kFp8DynamicTokenSym),
             (kFp8StaticTensorSym, kFp8DynamicTokenSym),
             (kFp8StaticTensorSym, kFp8StaticTensorSym),
+            (kFp8StaticTensorSym, kFp8DynamicTensorSym),
         ]
         return (weight_key, activation_key) in SUPPORTED_W_A