Fix: Re-Enable EP for trtllm MoE FP8 backend (#36494)
Signed-off-by: Amir Klein <203507526+amirkl94@users.noreply.github.com>
This commit is contained in:
@@ -35,12 +35,6 @@ class TrtLlmFp8Experts(mk.FusedMoEExpertsMonolithic):
|
|||||||
):
|
):
|
||||||
super().__init__(moe_config, quant_config)
|
super().__init__(moe_config, quant_config)
|
||||||
|
|
||||||
if moe_config.moe_parallel_config.use_ep and quant_config.is_per_tensor:
|
|
||||||
raise NotImplementedError(
|
|
||||||
"EP parallelism is not supported with TRTLLM"
|
|
||||||
"per-tensor FP8 quantization."
|
|
||||||
)
|
|
||||||
|
|
||||||
self.routing_method_type = moe_config.routing_method
|
self.routing_method_type = moe_config.routing_method
|
||||||
self.topk = moe_config.experts_per_token
|
self.topk = moe_config.experts_per_token
|
||||||
self.intermediate_size_per_partition = (
|
self.intermediate_size_per_partition = (
|
||||||
|
|||||||
Reference in New Issue
Block a user