Enable group size 64 for Machete (#20290)

Signed-off-by: czhu-cohere <conway.zhu@cohere.com>
2025-07-01 18:05:44 -07:00
parent e81fbefe8a
commit 3abfe22154
3 changed files with 25 additions and 8 deletions
--- a/vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py
+++ b/vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py
@@ -8,7 +8,7 @@ import torch

 from vllm import _custom_ops as ops
 from vllm.model_executor.layers.quantization.utils.machete_utils import (
-    MACHETE_SUPPORTED_GROUP_SIZES, check_machete_supports_shape,
+    check_machete_supports_shape, query_machete_supported_group_sizes,
    query_machete_supported_quant_types)
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
    pack_quantized_values_into_int32, unpack_quantized_values_into_int32)
@@ -40,10 +40,10 @@ class MacheteLinearKernel(MPLinearKernel):
                           "Machete, supported types are: "\
                           f"{query_machete_supported_quant_types(c.zero_points)}"

-        if c.group_size not in MACHETE_SUPPORTED_GROUP_SIZES:
+        if c.group_size not in query_machete_supported_group_sizes(c.act_type):
            return False, f"Group size ({c.group_size}) not supported by "\
                            "Machete, supported group sizes are: "\
-                            f"{MACHETE_SUPPORTED_GROUP_SIZES}"
+                            f"{query_machete_supported_group_sizes(c.act_type)}"

        return check_machete_supports_shape(c.partition_weight_shape[0],
                                            c.partition_weight_shape[1])