[Quant] [Bugfix] Fix quantization config matching with hf_to_vllm_mapper (#20046)

This commit is contained in:
Kyle Sayers
2025-07-01 06:20:34 -04:00
committed by GitHub
parent c05596f1a3
commit 9025a9a705
17 changed files with 107 additions and 29 deletions

View File

@@ -55,6 +55,7 @@ class TorchAOConfig(QuantizationConfig):
os.environ["VLLM_DISABLE_COMPILE_CACHE"] = "1"
logger.info("Using TorchAO: Setting VLLM_DISABLE_COMPILE_CACHE=1")
"""
super().__init__()
self.torchao_config = torchao_config
self.skip_modules = skip_modules or []