[Quantization] Automatically infer AWQ modules_to_not_convert field (#26909)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Isotr0py
2025-10-21 09:49:28 +08:00
committed by GitHub
parent bfe0b4bd2a
commit 352c0c8a28
6 changed files with 96 additions and 45 deletions

View File

@@ -285,7 +285,18 @@ def is_layer_skipped(
prefix: str,
ignored_layers: list[str],
fused_mapping: Mapping[str, list[str]] = MappingProxyType({}),
*,
skip_with_substr: bool = False,
) -> bool:
def prefix_full_match(prefix: str, ignored_layers: list[str]) -> bool:
return prefix in ignored_layers
# For case like: ignored_layers = ["self_attn"]
def substr_match(prefix: str, ignored_layers: list[str]) -> bool:
return any(layer in prefix for layer in ignored_layers)
match_func = substr_match if skip_with_substr else prefix_full_match
# prefix: model.layers.0.self_attn.q_proj
# proj_name: q_proj
proj_name = prefix.split(".")[-1]
@@ -302,7 +313,7 @@ def is_layer_skipped(
is_skipped = None
for shard_prefix in shard_prefixes:
is_shard_skipped = shard_prefix in ignored_layers
is_shard_skipped = match_func(shard_prefix, ignored_layers)
if is_skipped is None:
is_skipped = is_shard_skipped
@@ -312,16 +323,16 @@ def is_layer_skipped(
"are quantized. All shards of fused layers "
"to have the same precision."
)
elif "experts" in prefix:
elif "experts" in prefix and not skip_with_substr:
expert_ignore_layers = filter(
lambda layer_name: "experts" in layer_name, ignored_layers
)
return any(
[
prefix in layer_name
for layer_name in ignored_layers
if "experts" in layer_name
]
prefix in layer_name if not skip_with_substr else layer_name in prefix
for layer_name in expert_ignore_layers
)
else:
is_skipped = prefix in ignored_layers
is_skipped = match_func(prefix, ignored_layers)
assert is_skipped is not None
return is_skipped