diff --git a/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py b/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py index 183324420..64b772505 100644 --- a/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py +++ b/vllm/model_executor/layers/fused_moe/experts/trtllm_fp8_moe.py @@ -35,12 +35,6 @@ class TrtLlmFp8Experts(mk.FusedMoEExpertsMonolithic): ): super().__init__(moe_config, quant_config) - if moe_config.moe_parallel_config.use_ep and quant_config.is_per_tensor: - raise NotImplementedError( - "EP parallelism is not supported with TRTLLM" - "per-tensor FP8 quantization." - ) - self.routing_method_type = moe_config.routing_method self.topk = moe_config.experts_per_token self.intermediate_size_per_partition = (