From 066c6da6a04906a89739fb7e6874ceb6cf714364 Mon Sep 17 00:00:00 2001
From: Tyler Michael Smith <tyler@neuralmagic.com>
Date: Tue, 10 Feb 2026 22:15:43 -0500
Subject: [PATCH] [WideEP] Fix nvfp4 DeepEP High Throughput All2All backend
 (#33738)

Signed-off-by: Tyler Michael Smith <tlrmchlsmth@gmail.com>
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
---
 .../layers/quantization/utils/flashinfer_fp4_moe.py       | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py b/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py
index 4783ca5e0..cbdcd348c 100644
--- a/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py
+++ b/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py
@@ -82,8 +82,12 @@ def _supports_routing_method(
 
 
 def _supports_parallel_config(moe_parallel_config: FusedMoEParallelConfig) -> bool:
-    """Supports EP."""
-    return True
+    """
+    TRTLLM is a monolithic kernel that requires dispatch_router_logits() for
+    the naive dispatch/combine path. DeepEP HT only implements dispatch() for
+    the modular kernel path, so TRTLLM is incompatible with DeepEP HT.
+    """
+    return not moe_parallel_config.use_deepep_ht_kernels
 
 
 def is_supported_config_trtllm(