[Feature]: Improve GGUF loading from HuggingFace user experience like repo_id:quant_type (#29137)

Signed-off-by: Injae Ryou <injaeryou@gmail.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Injae Ryou
2025-11-25 23:28:53 +09:00
committed by GitHub
parent 0231ce836a
commit 794029f012
10 changed files with 579 additions and 36 deletions

View File

@@ -42,7 +42,10 @@ from vllm.logger import init_logger
from vllm.transformers_utils.config_parser_base import ConfigParserBase
from vllm.transformers_utils.utils import (
check_gguf_file,
is_gguf,
is_remote_gguf,
parse_safetensors_file_metadata,
split_remote_gguf,
)
if envs.VLLM_USE_MODELSCOPE:
@@ -629,10 +632,12 @@ def maybe_override_with_speculators(
Returns:
Tuple of (resolved_model, resolved_tokenizer, speculative_config)
"""
is_gguf = check_gguf_file(model)
if is_gguf:
if check_gguf_file(model):
kwargs["gguf_file"] = Path(model).name
gguf_model_repo = Path(model).parent
elif is_remote_gguf(model):
repo_id, _ = split_remote_gguf(model)
gguf_model_repo = Path(repo_id)
else:
gguf_model_repo = None
kwargs["local_files_only"] = huggingface_hub.constants.HF_HUB_OFFLINE
@@ -678,10 +683,18 @@ def get_config(
) -> PretrainedConfig:
# Separate model folder from file path for GGUF models
is_gguf = check_gguf_file(model)
if is_gguf:
kwargs["gguf_file"] = Path(model).name
model = Path(model).parent
_is_gguf = is_gguf(model)
_is_remote_gguf = is_remote_gguf(model)
if _is_gguf:
if check_gguf_file(model):
# Local GGUF file
kwargs["gguf_file"] = Path(model).name
model = Path(model).parent
elif _is_remote_gguf:
# Remote GGUF - extract repo_id from repo_id:quant_type format
# The actual GGUF file will be downloaded later by GGUFModelLoader
# Keep model as repo_id:quant_type for download, but use repo_id for config
model, _ = split_remote_gguf(model)
if config_format == "auto":
try:
@@ -689,10 +702,25 @@ def get_config(
# Transformers implementation.
if file_or_path_exists(model, MISTRAL_CONFIG_NAME, revision=revision):
config_format = "mistral"
elif is_gguf or file_or_path_exists(
elif (_is_gguf and not _is_remote_gguf) or file_or_path_exists(
model, HF_CONFIG_NAME, revision=revision
):
config_format = "hf"
# Remote GGUF models must have config.json in repo,
# otherwise the config can't be parsed correctly.
# FIXME(Isotr0py): Support remote GGUF repos without config.json
elif _is_remote_gguf and not file_or_path_exists(
model, HF_CONFIG_NAME, revision=revision
):
err_msg = (
"Could not find config.json for remote GGUF model repo. "
"To load remote GGUF model through `<repo_id>:<quant_type>`, "
"ensure your model has config.json (HF format) file. "
"Otherwise please specify --hf-config-path <original_repo> "
"in engine args to fetch config from unquantized hf model."
)
logger.error(err_msg)
raise ValueError(err_msg)
else:
raise ValueError(
"Could not detect config format for no config file found. "
@@ -713,9 +741,6 @@ def get_config(
"'config.json'.\n"
" - For Mistral models: ensure the presence of a "
"'params.json'.\n"
"3. For GGUF: pass the local path of the GGUF checkpoint.\n"
" Loading GGUF from a remote repo directly is not yet "
"supported.\n"
).format(model=model)
raise ValueError(error_message) from e
@@ -729,7 +754,7 @@ def get_config(
**kwargs,
)
# Special architecture mapping check for GGUF models
if is_gguf:
if _is_gguf:
if config.model_type not in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES:
raise RuntimeError(f"Can't get gguf config for {config.model_type}.")
model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES[config.model_type]
@@ -889,6 +914,8 @@ def get_pooling_config(model: str, revision: str | None = "main") -> dict | None
A dictionary containing the pooling type and whether
normalization is used, or None if no pooling configuration is found.
"""
if is_remote_gguf(model):
model, _ = split_remote_gguf(model)
modules_file_name = "modules.json"
@@ -1108,6 +1135,8 @@ def get_hf_image_processor_config(
# Separate model folder from file path for GGUF models
if check_gguf_file(model):
model = Path(model).parent
elif is_remote_gguf(model):
model, _ = split_remote_gguf(model)
return get_image_processor_config(
model, token=hf_token, revision=revision, **kwargs
)