[Bugfix] Fix missing is_layer_skipped check for FusedMoE in AWQConfig (#32935)
Signed-off-by: jon <joninco@bullpoint.org>
This commit is contained in:
@@ -106,7 +106,7 @@ class AWQConfig(QuantizationConfig):
|
||||
return AWQLinearMethod(self)
|
||||
elif isinstance(layer, FusedMoE):
|
||||
# Lazy import to avoid circular import.
|
||||
from .awq_marlin import AWQMarlinConfig, AWQMarlinMoEMethod
|
||||
from .awq_marlin import AWQMarlinConfig
|
||||
from .moe_wna16 import MoeWNA16Config
|
||||
from .utils.marlin_utils import check_moe_marlin_supports_layer
|
||||
|
||||
@@ -121,6 +121,7 @@ class AWQConfig(QuantizationConfig):
|
||||
"group_size": self.group_size,
|
||||
"zero_point": self.zero_point,
|
||||
"lm_head": False,
|
||||
"modules_to_not_convert": self.modules_to_not_convert,
|
||||
}
|
||||
return MoeWNA16Config.from_config(config).get_quant_method(
|
||||
layer, prefix
|
||||
@@ -136,7 +137,7 @@ class AWQConfig(QuantizationConfig):
|
||||
awq_marlin_config = AWQMarlinConfig.from_config(
|
||||
marlin_compatible_config_dict
|
||||
)
|
||||
return AWQMarlinMoEMethod(awq_marlin_config, layer.moe_config)
|
||||
return awq_marlin_config.get_quant_method(layer, prefix)
|
||||
return None
|
||||
|
||||
def apply_vllm_mapper(self, hf_to_vllm_mapper: "WeightsMapper"):
|
||||
|
||||
Reference in New Issue
Block a user