From e346e2d056a66bb84287e4fea049bde9a37bd72b Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Tue, 3 Feb 2026 05:37:15 -0500
Subject: [PATCH] [Bugfix] Disable
 RoutingMethodType.[Renormalize,RenormalizeNaive] TRTLLM per-tensor FP8 MoE
 (#33620)

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 .../layers/fused_moe/flashinfer_trtllm_moe.py               | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py
index a066535c5..43e02d510 100644
--- a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py
+++ b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py
@@ -72,8 +72,10 @@ def _supports_routing_method(
         # NOTE(dbari): as above, potentially allow others here.
         return routing_method in [
             RoutingMethodType.Llama4,
-            RoutingMethodType.Renormalize,
-            RoutingMethodType.RenormalizeNaive,
+            # NOTE(mgoin): Disabled to investigate accuracy issues.
+            # See https://github.com/vllm-project/vllm/issues/33532
+            # RoutingMethodType.Renormalize,
+            # RoutingMethodType.RenormalizeNaive,
         ]
     else:
         raise ValueError("Unsupported quantization scheme.")