Fix: Re-Enable EP for trtllm MoE FP8 backend (#36494)

Signed-off-by: Amir Klein <203507526+amirkl94@users.noreply.github.com>
2026-03-10 08:11:27 +02:00
parent d0cd736caa
commit 156e33553c
1 changed files with 0 additions and 6 deletions
--- a/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py
@@ -35,12 +35,6 @@ class TrtLlmFp8Experts(mk.FusedMoEExpertsMonolithic):
    ):
        super().__init__(moe_config, quant_config)

-        if moe_config.moe_parallel_config.use_ep and quant_config.is_per_tensor:
-            raise NotImplementedError(
-                "EP parallelism is not supported with TRTLLM"
-                "per-tensor FP8 quantization."
-            )
-
        self.routing_method_type = moe_config.routing_method
        self.topk = moe_config.experts_per_token
        self.intermediate_size_per_partition = (