[Kernel] Add MXFP8 to Marlin GEMM/MoE and refactor Mxfp8LinearOp (#34664)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2026-04-01 18:41:42 +02:00
committed by GitHub
parent dc0428ebb8
commit db5d0719e1
15 changed files with 481 additions and 129 deletions

View File

@@ -151,6 +151,12 @@ MOE_MARLIN_QUANT_TEST_CONFIGS = [
"b_type": scalar_types.float4_e2m1f,
"group_blocks": [2],
},
# MXFP8
{
"a_type": [scalar_types.bfloat16],
"b_type": scalar_types.float8_e4m3fn,
"group_blocks": [2],
},
# AWQ-INT4 with INT8 activation
{
"a_type": [scalar_types.int8],