From a5a623d9616337ed53e8fa0e1376e2d17b505e38 Mon Sep 17 00:00:00 2001
From: yzong-rh <yzong@redhat.com>
Date: Fri, 3 Apr 2026 13:48:17 -0400
Subject: [PATCH] [Bugfix] Re-enable Renormalize routing for TRT-LLM MoE
 experts (#38859)

Signed-off-by: Yifan Zong <yzong@redhat.com>
---
 .../layers/fused_moe/experts/trtllm_bf16_moe.py      |  7 ++-----
 .../layers/fused_moe/experts/trtllm_fp8_moe.py       | 12 +++++-------
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/experts/trtllm_bf16_moe.py b/vllm/model_executor/layers/fused_moe/experts/trtllm_bf16_moe.py
index c4d2b96ec..ee6df1af1 100644
--- a/vllm/model_executor/layers/fused_moe/experts/trtllm_bf16_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/trtllm_bf16_moe.py
@@ -79,11 +79,8 @@ class TrtLlmBf16Experts(mk.FusedMoEExpertsMonolithic):
             RoutingMethodType.Default,
             RoutingMethodType.DeepSeekV3,
             RoutingMethodType.Llama4,
-            # NOTE: TRTLLM Kernel has issue with Qwen3.5 router.
-            # Re-enable once the issue is resolved.
-            # https://github.com/vllm-project/vllm/issues/37591
-            # RoutingMethodType.Renormalize,
-            # RoutingMethodType.RenormalizeNaive
+            RoutingMethodType.Renormalize,
+            RoutingMethodType.RenormalizeNaive,
         ]
 
     @staticmethod
diff --git a/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py b/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py
index aa78e10b6..cf46eefa3 100644
--- a/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py
@@ -277,13 +277,7 @@ class TrtLlmFp8ExpertsMonolithic(TrtLlmFp8ExpertsBase, mk.FusedMoEExpertsMonolit
         weight_key: QuantKey | None,
         activation_key: QuantKey | None,
     ) -> bool:
-        """Monolithic kernels need to express router support.
-        Renormalize/RenormalizeNaive are excluded: the monolithic kernel's
-        internal routing for these methods produces output uncorrelated
-        with the modular kernel's output and with Triton kernel's output
-        for Qwen3.5-35B-A3B-FP8.
-        See: https://github.com/vllm-project/vllm/issues/37591
-        """
+        """Monolithic kernels need to express router support."""
         # NOTE(dbari): TopK routing could also be enabled, but need to validate models
         # NOTE(dbari): Default is not implemented and should not be enabled until it is
 
@@ -295,6 +289,8 @@ class TrtLlmFp8ExpertsMonolithic(TrtLlmFp8ExpertsBase, mk.FusedMoEExpertsMonolit
             return routing_method in [
                 RoutingMethodType.DeepSeekV3,
                 RoutingMethodType.Simulated,
+                RoutingMethodType.Renormalize,
+                RoutingMethodType.RenormalizeNaive,
             ]
         elif (weight_key, activation_key) == (kFp8StaticTensorSym, kFp8StaticTensorSym):
             # NOTE(dbari): as above, potentially allow others here.
@@ -302,6 +298,8 @@ class TrtLlmFp8ExpertsMonolithic(TrtLlmFp8ExpertsBase, mk.FusedMoEExpertsMonolit
                 RoutingMethodType.DeepSeekV3,
                 RoutingMethodType.Llama4,
                 RoutingMethodType.Simulated,
+                RoutingMethodType.Renormalize,
+                RoutingMethodType.RenormalizeNaive,
             ]
         else:
             raise ValueError("Unsupported quantization scheme.")