From daa2784bb9333067f3822fb05fc6e5ce459932b4 Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Tue, 3 Feb 2026 05:37:15 -0500
Subject: [PATCH] [Bugfix] Disable
 RoutingMethodType.[Renormalize,RenormalizeNaive] TRTLLM per-tensor FP8 MoE
 (#33620)

Signed-off-by: mgoin <mgoin64@gmail.com>
(cherry picked from commit e346e2d056a66bb84287e4fea049bde9a37bd72b)

Signed-off-by: Robert Shaw <rshaw@neuralmagic.com>
---
 .../layers/fused_moe/flashinfer_trtllm_moe.py         | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py
index 04336e9d3..0d7473aaf 100644
--- a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py
+++ b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py
@@ -69,9 +69,14 @@ def _supports_routing_method(
             RoutingMethodType.RenormalizeNaive,
         ]
     elif (weight_key, activation_key) == (kFp8StaticTensorSym, kFp8StaticTensorSym):
-        # NOTE(rob): kernel requires Llama4.
-        return routing_method == RoutingMethodType.Llama4
-
+        # NOTE(dbari): as above, potentially allow others here.
+        return routing_method in [
+            RoutingMethodType.Llama4,
+            # NOTE(mgoin): Disabled to investigate accuracy issues.
+            # See https://github.com/vllm-project/vllm/issues/33532
+            # RoutingMethodType.Renormalize,
+            # RoutingMethodType.RenormalizeNaive,
+        ]
     else:
         raise ValueError("Unsupported quantization scheme.")