[Model] Add AWQ quantization support for InternVL2 model (#7187)
This commit is contained in:
@@ -570,7 +570,8 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
|
||||
# for the packing.
|
||||
if isinstance(param, PackedvLLMParameter
|
||||
) and param.packed_dim == param.output_dim:
|
||||
param.adjust_shard_indexes_for_packing(
|
||||
shard_size, shard_offset = \
|
||||
param.adjust_shard_indexes_for_packing(
|
||||
shard_size=shard_size, shard_offset=shard_offset)
|
||||
|
||||
loaded_weight_shard = loaded_weight.narrow(param.output_dim,
|
||||
@@ -719,7 +720,8 @@ class QKVParallelLinear(ColumnParallelLinear):
|
||||
# for the packing.
|
||||
if isinstance(param, PackedvLLMParameter
|
||||
) and param.packed_dim == param.output_dim:
|
||||
param.adjust_shard_indexes_for_packing(
|
||||
shard_size, shard_offset = \
|
||||
param.adjust_shard_indexes_for_packing(
|
||||
shard_size=shard_size, shard_offset=shard_offset)
|
||||
|
||||
loaded_weight_shard = loaded_weight.narrow(param.output_dim,
|
||||
|
||||
Reference in New Issue
Block a user