Fix: Re-Enable EP for trtllm MoE FP8 backend (#36494)

Signed-off-by: Amir Klein <203507526+amirkl94@users.noreply.github.com>
This commit is contained in:
amirkl94
2026-03-10 08:11:27 +02:00
committed by GitHub
parent d0cd736caa
commit 156e33553c

View File

@@ -35,12 +35,6 @@ class TrtLlmFp8Experts(mk.FusedMoEExpertsMonolithic):
):
super().__init__(moe_config, quant_config)
if moe_config.moe_parallel_config.use_ep and quant_config.is_per_tensor:
raise NotImplementedError(
"EP parallelism is not supported with TRTLLM"
"per-tensor FP8 quantization."
)
self.routing_method_type = moe_config.routing_method
self.topk = moe_config.experts_per_token
self.intermediate_size_per_partition = (