[NVIDIA] Auto detect modelopt quant and fix DSR1-FP4 weight loading (#22073)

This commit is contained in:
Po-Han Huang (NVIDIA)
2025-08-05 09:02:55 +08:00
committed by GitHub
parent c09efff976
commit bdcb42e45d
3 changed files with 67 additions and 15 deletions

View File

@@ -449,6 +449,20 @@ def get_config(
model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES[config.model_type]
config.update({"architectures": [model_type]})
# ModelOpt 0.31.0 and after saves the quantization config in the model
# config file.
quantization_config = config_dict.get("quantization_config", None)
# ModelOpt 0.29.0 and before saves the quantization config in a separate
# "hf_quant_config.json" in the same directory as the model config file.
if quantization_config is None \
and file_or_path_exists(model, "hf_quant_config.json", revision):
quantization_config = get_hf_file_to_dict("hf_quant_config.json",
model, revision)
if quantization_config is not None:
config.quantization_config = quantization_config
if hf_overrides_kw:
logger.debug("Overriding HF config with %s", hf_overrides_kw)
config.update(hf_overrides_kw)