From 156e33553ccdba940fec83a720290b30d2686ee8 Mon Sep 17 00:00:00 2001 From: amirkl94 <203507526+amirkl94@users.noreply.github.com> Date: Tue, 10 Mar 2026 08:11:27 +0200 Subject: [PATCH] Fix: Re-Enable EP for trtllm MoE FP8 backend (#36494) Signed-off-by: Amir Klein <203507526+amirkl94@users.noreply.github.com> --- .../layers/fused_moe/experts/trtllm_fp8_moe.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py b/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py index 183324420..64b772505 100644 --- a/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py +++ b/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py @@ -35,12 +35,6 @@ class TrtLlmFp8Experts(mk.FusedMoEExpertsMonolithic): ): super().__init__(moe_config, quant_config) - if moe_config.moe_parallel_config.use_ep and quant_config.is_per_tensor: - raise NotImplementedError( - "EP parallelism is not supported with TRTLLM" - "per-tensor FP8 quantization." - ) - self.routing_method_type = moe_config.routing_method self.topk = moe_config.experts_per_token self.intermediate_size_per_partition = (