[Model] Add AWQ quantization support for InternVL2 model (#7187)

This commit is contained in:
Isotr0py
2024-08-21 14:18:57 +08:00
committed by GitHub
parent b74a125800
commit 12e1c65bc9
4 changed files with 123 additions and 25 deletions

View File

@@ -570,7 +570,8 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
# for the packing.
if isinstance(param, PackedvLLMParameter
) and param.packed_dim == param.output_dim:
param.adjust_shard_indexes_for_packing(
shard_size, shard_offset = \
param.adjust_shard_indexes_for_packing(
shard_size=shard_size, shard_offset=shard_offset)
loaded_weight_shard = loaded_weight.narrow(param.output_dim,
@@ -719,7 +720,8 @@ class QKVParallelLinear(ColumnParallelLinear):
# for the packing.
if isinstance(param, PackedvLLMParameter
) and param.packed_dim == param.output_dim:
param.adjust_shard_indexes_for_packing(
shard_size, shard_offset = \
param.adjust_shard_indexes_for_packing(
shard_size=shard_size, shard_offset=shard_offset)
loaded_weight_shard = loaded_weight.narrow(param.output_dim,