From 066c6da6a04906a89739fb7e6874ceb6cf714364 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Tue, 10 Feb 2026 22:15:43 -0500 Subject: [PATCH] [WideEP] Fix nvfp4 DeepEP High Throughput All2All backend (#33738) Signed-off-by: Tyler Michael Smith Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> --- .../layers/quantization/utils/flashinfer_fp4_moe.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py b/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py index 4783ca5e0..cbdcd348c 100644 --- a/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py +++ b/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py @@ -82,8 +82,12 @@ def _supports_routing_method( def _supports_parallel_config(moe_parallel_config: FusedMoEParallelConfig) -> bool: - """Supports EP.""" - return True + """ + TRTLLM is a monolithic kernel that requires dispatch_router_logits() for + the naive dispatch/combine path. DeepEP HT only implements dispatch() for + the modular kernel path, so TRTLLM is incompatible with DeepEP HT. + """ + return not moe_parallel_config.use_deepep_ht_kernels def is_supported_config_trtllm(