[Kernel] Delegate construction of FusedMoEQuantConfig to FusedMoEMethodBase subclasses (#22537)
Signed-off-by: Bill Nell <bnell@redhat.com>
This commit is contained in:
@@ -81,9 +81,14 @@ def _fp8_linear_may_use_deep_gemm(module: torch.nn.Module) -> bool:
|
||||
|
||||
|
||||
def _fused_moe_grouped_gemm_may_use_deep_gemm(module: torch.nn.Module) -> bool:
|
||||
if not (isinstance(module, FusedMoE)
|
||||
and module.moe_config.quant_dtype == torch.float8_e4m3fn
|
||||
and module.moe_config.block_shape == deep_gemm_block_shape()):
|
||||
if not isinstance(module, FusedMoE):
|
||||
return False
|
||||
|
||||
moe_quant_config = module.quant_method.get_fused_moe_quant_config(module)
|
||||
|
||||
if (moe_quant_config is None
|
||||
or moe_quant_config.quant_dtype != torch.float8_e4m3fn
|
||||
or moe_quant_config.block_shape != deep_gemm_block_shape()):
|
||||
return False
|
||||
|
||||
if not isinstance(module.quant_method.fused_experts,
|
||||
|
||||
Reference in New Issue
Block a user