[Kernel] (2/N) Machete - Integrate into CompressedTensorsWNA16 and GPTQMarlin (#7701)
Co-authored-by: mgoin <michael@neuralmagic.com> Co-authored-by: Divakar Verma <137818590+divakar-amd@users.noreply.github.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
@@ -31,6 +31,8 @@ MNK_SHAPES = [
|
||||
(257, 4224, 4160),
|
||||
(257, 4096, 4096),
|
||||
(64, 4096, 4096),
|
||||
(1024, 4096, 8192),
|
||||
(1024, 8192, 4096),
|
||||
]
|
||||
|
||||
ACT_TYPES = [torch.float16, torch.bfloat16]
|
||||
@@ -139,6 +141,7 @@ def test_machete_all_schedules(shape, atype: torch.dtype,
|
||||
output_ref = torch.matmul(a, w_ref)
|
||||
|
||||
for schedule in ops.machete_supported_schedules(wtype):
|
||||
print(f"Testing schedule {schedule}")
|
||||
output = ops.machete_gemm(
|
||||
a,
|
||||
b_q=w_q_machete,
|
||||
|
||||
15
tests/kernels/test_permute_cols.py
Normal file
15
tests/kernels/test_permute_cols.py
Normal file
@@ -0,0 +1,15 @@
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from tests.kernels.utils import opcheck
|
||||
from vllm._custom_ops import permute_cols
|
||||
|
||||
|
||||
@pytest.mark.parametrize('shape', [(1, 512), (544, 4096), (67, 8192)])
|
||||
@pytest.mark.parametrize('dtype', [torch.bfloat16, torch.float16])
|
||||
def test_permute_cols(shape, dtype):
|
||||
x = torch.randn(shape, dtype=dtype).cuda()
|
||||
perm = torch.randperm(x.shape[1]).to(torch.int).cuda()
|
||||
opcheck(torch.ops._C.permute_cols, (x, perm))
|
||||
y = permute_cols(x, perm)
|
||||
torch.testing.assert_close(y, x[:, perm])
|
||||
Reference in New Issue
Block a user