[MoE Refactor] Oracle Select FP8+NVFP4 Kernels In Priority (#32414)

This commit is contained in:
Robert Shaw
2026-01-21 08:22:33 -05:00
committed by GitHub
parent e14467be43
commit 42135d6898
82 changed files with 2710 additions and 1563 deletions

View File

@@ -853,6 +853,7 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
max_num_tokens=max_num_tokens_per_rank,
num_dispatchers=prepare_finalize.num_dispatchers(),
quant_config=self.moe_quant_config,
moe_config=self.moe,
)
else:
raise NotImplementedError(
@@ -875,11 +876,11 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
}
return TrtLlmGenExperts(self.moe, self.moe_quant_config, **kwargs)
elif self.mxfp4_backend == Mxfp4Backend.MARLIN:
return MarlinExperts(self.moe_quant_config)
return MarlinExperts(self.moe, self.moe_quant_config)
elif self.mxfp4_backend == Mxfp4Backend.TRITON:
if self.moe.is_lora_enabled:
return UnfusedOAITritonExperts(self.moe_quant_config)
return OAITritonExperts(self.moe_quant_config)
return UnfusedOAITritonExperts(self.moe, self.moe_quant_config)
return OAITritonExperts(self.moe, self.moe_quant_config)
else:
raise NotImplementedError(
f"Incompatible Mxfp4 backend ({self.mxfp4_backend}) for EP"