Enable group size 64 for Machete (#20290)

Signed-off-by: czhu-cohere <conway.zhu@cohere.com>
This commit is contained in:
czhu-cohere
2025-07-01 18:05:44 -07:00
committed by GitHub
parent e81fbefe8a
commit 3abfe22154
3 changed files with 25 additions and 8 deletions

View File

@@ -14,6 +14,8 @@ import torch
from tests.kernels.utils import opcheck
from vllm import _custom_ops as ops
from vllm.model_executor.layers.quantization.utils.machete_utils import (
query_machete_supported_group_sizes)
from vllm.model_executor.layers.quantization.utils.quant_utils import (
pack_rows, quantize_weights)
from vllm.platforms import current_platform
@@ -46,8 +48,6 @@ MNK_SHAPES = [
(1024, 8192, 4096),
]
GROUP_SIZES_TO_TEST: list[Optional[int]] = [128, -1]
@dataclass
class TypeConfig:
@@ -270,7 +270,7 @@ def test_machete_all_schedules(shape, types: TypeConfig):
if types.group_scale_type is None:
group_sizes = [None]
else:
group_sizes = GROUP_SIZES_TO_TEST
group_sizes = query_machete_supported_group_sizes(types.act_type)
for group_size in group_sizes:
if not group_size_valid(shape, group_size):
@@ -299,7 +299,7 @@ def test_machete_heuristic(shape, types: TypeConfig):
if types.group_scale_type is None:
group_sizes = [None]
else:
group_sizes = GROUP_SIZES_TO_TEST
group_sizes = query_machete_supported_group_sizes(types.act_type)
for group_size in group_sizes:
if not group_size_valid(shape, group_size):