Categorize tests/kernels/ based on kernel type (#16799)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
38
tests/kernels/quantization/test_ggml.py
Normal file
38
tests/kernels/quantization/test_ggml.py
Normal file
@@ -0,0 +1,38 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import gguf
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from tests.kernels.utils import opcheck
|
||||
from vllm import _custom_ops as ops # noqa: F401
|
||||
|
||||
|
||||
@pytest.mark.parametrize("quant_type", [12])
|
||||
def test_ggml_opcheck(quant_type):
|
||||
block_size, type_size = gguf.GGML_QUANT_SIZES[quant_type]
|
||||
shape = [256, 1152]
|
||||
qweight = torch.randint(0, 100, shape, device='cuda', dtype=torch.uint8)
|
||||
m = qweight.shape[0]
|
||||
n = qweight.shape[1] // type_size * block_size
|
||||
opcheck(torch.ops._C.ggml_dequantize,
|
||||
(qweight, quant_type, m, n, torch.float16))
|
||||
|
||||
x = torch.rand((m, 512), device='cuda', dtype=torch.float16)
|
||||
opcheck(torch.ops._C.ggml_mul_mat_a8,
|
||||
(qweight, x, quant_type, qweight.shape[0]))
|
||||
opcheck(torch.ops._C.ggml_mul_mat_vec_a8,
|
||||
(qweight, x, quant_type, qweight.shape[0]))
|
||||
|
||||
shape = [256, 1024, 336]
|
||||
qweight = torch.randint(0, 100, shape, device='cuda', dtype=torch.uint8)
|
||||
x = torch.rand((1, 1024), device='cuda', dtype=torch.float16)
|
||||
sorted_token_ids = torch.arange(776, device='cuda')
|
||||
expert_ids = torch.randint(0, 256, (194, ), device='cuda')
|
||||
num_tokens_post_padded = torch.tensor([1],
|
||||
dtype=torch.int64,
|
||||
device='cuda')
|
||||
|
||||
opcheck(torch.ops._C.ggml_moe_a8,
|
||||
(x, qweight, sorted_token_ids, expert_ids, num_tokens_post_padded,
|
||||
quant_type, qweight.shape[0], 1, x.shape[0]))
|
||||
Reference in New Issue
Block a user