[Quantization] Quark MXFP4 format loading (#16943)

This commit is contained in:
Bowen Bao
2025-05-07 12:05:05 -07:00
committed by GitHub
parent f98e307588
commit db593aa67f
9 changed files with 289 additions and 3 deletions

View File

@@ -220,7 +220,7 @@ def get_model_architecture(
# Special handling for quantized Mixtral.
# FIXME(woosuk): This is a temporary hack.
mixtral_supported = [
"fp8", "compressed-tensors", "gptq_marlin", "awq_marlin"
"fp8", "compressed-tensors", "gptq_marlin", "awq_marlin", "quark"
]
if (model_config.quantization is not None