[Kernel] (2/N) Machete - Integrate into CompressedTensorsWNA16 and GPTQMarlin (#7701)

Co-authored-by: mgoin <michael@neuralmagic.com> Co-authored-by: Divakar Verma <137818590+divakar-amd@users.noreply.github.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
2024-09-23 13:46:26 -04:00
parent ee5f34b1c2
commit 86e9c8df29
27 changed files with 1005 additions and 246 deletions
--- a/tests/kernels/test_machete_gemm.py
+++ b/tests/kernels/test_machete_gemm.py
@@ -31,6 +31,8 @@ MNK_SHAPES = [
    (257, 4224, 4160),
    (257, 4096, 4096),
    (64, 4096, 4096),
+    (1024, 4096, 8192),
+    (1024, 8192, 4096),
 ]

 ACT_TYPES = [torch.float16, torch.bfloat16]
@@ -139,6 +141,7 @@ def test_machete_all_schedules(shape, atype: torch.dtype,
    output_ref = torch.matmul(a, w_ref)

    for schedule in ops.machete_supported_schedules(wtype):
+        print(f"Testing schedule {schedule}")
        output = ops.machete_gemm(
            a,
            b_q=w_q_machete,