[BugFix] Skip the Q component for QKVParallelLinear in the case of QKVCrossParallelLinear since its width is 0 (#22369)
Signed-off-by: sstamenk <sstamenk@amd.com>
This commit is contained in:
@@ -121,6 +121,9 @@ def requantize_with_max_scale(
|
||||
if unfused_module_in_checkpoint:
|
||||
start = 0
|
||||
for idx, logical_width in enumerate(logical_widths):
|
||||
# Skip any component with zero width.
|
||||
if logical_width == 0:
|
||||
continue
|
||||
end = start + logical_width
|
||||
weight_dq = per_tensor_dequantize(weight[start:end, :],
|
||||
weight_scale[idx])
|
||||
|
||||
Reference in New Issue
Block a user