[Bugfix] Fix missing is_layer_skipped check for FusedMoE in AWQConfig (#32935)
Signed-off-by: jon <joninco@bullpoint.org>
This commit is contained in:
@@ -106,7 +106,7 @@ class AWQConfig(QuantizationConfig):
|
|||||||
return AWQLinearMethod(self)
|
return AWQLinearMethod(self)
|
||||||
elif isinstance(layer, FusedMoE):
|
elif isinstance(layer, FusedMoE):
|
||||||
# Lazy import to avoid circular import.
|
# Lazy import to avoid circular import.
|
||||||
from .awq_marlin import AWQMarlinConfig, AWQMarlinMoEMethod
|
from .awq_marlin import AWQMarlinConfig
|
||||||
from .moe_wna16 import MoeWNA16Config
|
from .moe_wna16 import MoeWNA16Config
|
||||||
from .utils.marlin_utils import check_moe_marlin_supports_layer
|
from .utils.marlin_utils import check_moe_marlin_supports_layer
|
||||||
|
|
||||||
@@ -121,6 +121,7 @@ class AWQConfig(QuantizationConfig):
|
|||||||
"group_size": self.group_size,
|
"group_size": self.group_size,
|
||||||
"zero_point": self.zero_point,
|
"zero_point": self.zero_point,
|
||||||
"lm_head": False,
|
"lm_head": False,
|
||||||
|
"modules_to_not_convert": self.modules_to_not_convert,
|
||||||
}
|
}
|
||||||
return MoeWNA16Config.from_config(config).get_quant_method(
|
return MoeWNA16Config.from_config(config).get_quant_method(
|
||||||
layer, prefix
|
layer, prefix
|
||||||
@@ -136,7 +137,7 @@ class AWQConfig(QuantizationConfig):
|
|||||||
awq_marlin_config = AWQMarlinConfig.from_config(
|
awq_marlin_config = AWQMarlinConfig.from_config(
|
||||||
marlin_compatible_config_dict
|
marlin_compatible_config_dict
|
||||||
)
|
)
|
||||||
return AWQMarlinMoEMethod(awq_marlin_config, layer.moe_config)
|
return awq_marlin_config.get_quant_method(layer, prefix)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def apply_vllm_mapper(self, hf_to_vllm_mapper: "WeightsMapper"):
|
def apply_vllm_mapper(self, hf_to_vllm_mapper: "WeightsMapper"):
|
||||||
|
|||||||
Reference in New Issue
Block a user