[Kernel] Add MXFP8 to Marlin GEMM/MoE and refactor Mxfp8LinearOp (#34664)

Signed-off-by: mgoin <mgoin64@gmail.com>
2026-04-01 18:41:42 +02:00
parent dc0428ebb8
commit db5d0719e1
15 changed files with 481 additions and 129 deletions
--- a/tests/kernels/moe/test_moe.py
+++ b/tests/kernels/moe/test_moe.py
@@ -151,6 +151,12 @@ MOE_MARLIN_QUANT_TEST_CONFIGS = [
        "b_type": scalar_types.float4_e2m1f,
        "group_blocks": [2],
    },
+    # MXFP8
+    {
+        "a_type": [scalar_types.bfloat16],
+        "b_type": scalar_types.float8_e4m3fn,
+        "group_blocks": [2],
+    },
    # AWQ-INT4 with INT8 activation
    {
        "a_type": [scalar_types.int8],