From 156e33553ccdba940fec83a720290b30d2686ee8 Mon Sep 17 00:00:00 2001
From: amirkl94 <203507526+amirkl94@users.noreply.github.com>
Date: Tue, 10 Mar 2026 08:11:27 +0200
Subject: [PATCH] Fix: Re-Enable EP for trtllm MoE FP8 backend (#36494)

Signed-off-by: Amir Klein <203507526+amirkl94@users.noreply.github.com>
---
 .../layers/fused_moe/experts/trtllm_fp8_moe.py              | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py b/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py
index 183324420..64b772505 100644
--- a/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py
@@ -35,12 +35,6 @@ class TrtLlmFp8Experts(mk.FusedMoEExpertsMonolithic):
     ):
         super().__init__(moe_config, quant_config)
 
-        if moe_config.moe_parallel_config.use_ep and quant_config.is_per_tensor:
-            raise NotImplementedError(
-                "EP parallelism is not supported with TRTLLM"
-                "per-tensor FP8 quantization."
-            )
-
         self.routing_method_type = moe_config.routing_method
         self.topk = moe_config.experts_per_token
         self.intermediate_size_per_partition = (