[Bugfix][Quantization] Fix FP8 + EP (#13784)

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
Tyler Michael Smith
2025-02-24 21:54:17 -05:00
committed by GitHub
parent 51010a1807
commit 1e15aaef56
6 changed files with 22 additions and 22 deletions

View File

@@ -136,7 +136,7 @@ class AWQMarlinConfig(QuantizationConfig):
self.full_config).get_quant_method(layer, prefix)
return AWQMarlinLinearMethod(self)
elif isinstance(layer, FusedMoE):
if layer.num_experts > 32:
if layer.local_num_experts > 32:
# For MoEs with many experts the moe_wna16 kernel is faster
return MoeWNA16Config.from_config(
self.full_config).get_quant_method(layer, prefix)