[Bugfix] Fix ConchLinearKernel channelwise quantization (group_size=-1) (#37329)
Signed-off-by: Matthias Gehre <matthias.gehre@amd.com>
This commit is contained in:
@@ -124,6 +124,14 @@ class ConchLinearKernel(MPLinearKernel):
|
|||||||
|
|
||||||
w_q, w_s, w_zp, _ = self._get_weight_params(layer)
|
w_q, w_s, w_zp, _ = self._get_weight_params(layer)
|
||||||
|
|
||||||
|
# Map channelwise group_size=-1 to the actual input dimension K.
|
||||||
|
# The conch kernel computes stride_mul = block_k / group_size;
|
||||||
|
# passing -1 produces a negative stride that reads out-of-bounds
|
||||||
|
# scale values for all K-blocks after the first.
|
||||||
|
group_size = self.config.group_size
|
||||||
|
if group_size == -1:
|
||||||
|
group_size = x.shape[-1]
|
||||||
|
|
||||||
output = mixed_precision_gemm(
|
output = mixed_precision_gemm(
|
||||||
x=x,
|
x=x,
|
||||||
w_q_packed=w_q.data,
|
w_q_packed=w_q.data,
|
||||||
@@ -131,7 +139,7 @@ class ConchLinearKernel(MPLinearKernel):
|
|||||||
w_zp=w_zp.data if w_zp is not None else None,
|
w_zp=w_zp.data if w_zp is not None else None,
|
||||||
weight_size_bits=self.config.weight_type.size_bits,
|
weight_size_bits=self.config.weight_type.size_bits,
|
||||||
weight_bias=self.config.weight_type.bias,
|
weight_bias=self.config.weight_type.bias,
|
||||||
group_size=self.config.group_size,
|
group_size=group_size,
|
||||||
)
|
)
|
||||||
|
|
||||||
if bias is not None:
|
if bias is not None:
|
||||||
|
|||||||
Reference in New Issue
Block a user