[NVIDIA] Auto detect modelopt quant and fix DSR1-FP4 weight loading (#22073)
This commit is contained in:
committed by
GitHub
parent
c09efff976
commit
bdcb42e45d
@@ -449,6 +449,20 @@ def get_config(
|
||||
model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES[config.model_type]
|
||||
config.update({"architectures": [model_type]})
|
||||
|
||||
# ModelOpt 0.31.0 and after saves the quantization config in the model
|
||||
# config file.
|
||||
quantization_config = config_dict.get("quantization_config", None)
|
||||
|
||||
# ModelOpt 0.29.0 and before saves the quantization config in a separate
|
||||
# "hf_quant_config.json" in the same directory as the model config file.
|
||||
if quantization_config is None \
|
||||
and file_or_path_exists(model, "hf_quant_config.json", revision):
|
||||
quantization_config = get_hf_file_to_dict("hf_quant_config.json",
|
||||
model, revision)
|
||||
|
||||
if quantization_config is not None:
|
||||
config.quantization_config = quantization_config
|
||||
|
||||
if hf_overrides_kw:
|
||||
logger.debug("Overriding HF config with %s", hf_overrides_kw)
|
||||
config.update(hf_overrides_kw)
|
||||
|
||||
Reference in New Issue
Block a user