[Feature]: Improve GGUF loading from HuggingFace user experience like repo_id:quant_type (#29137)
Signed-off-by: Injae Ryou <injaeryou@gmail.com> Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
@@ -42,7 +42,10 @@ from vllm.logger import init_logger
|
||||
from vllm.transformers_utils.config_parser_base import ConfigParserBase
|
||||
from vllm.transformers_utils.utils import (
|
||||
check_gguf_file,
|
||||
is_gguf,
|
||||
is_remote_gguf,
|
||||
parse_safetensors_file_metadata,
|
||||
split_remote_gguf,
|
||||
)
|
||||
|
||||
if envs.VLLM_USE_MODELSCOPE:
|
||||
@@ -629,10 +632,12 @@ def maybe_override_with_speculators(
|
||||
Returns:
|
||||
Tuple of (resolved_model, resolved_tokenizer, speculative_config)
|
||||
"""
|
||||
is_gguf = check_gguf_file(model)
|
||||
if is_gguf:
|
||||
if check_gguf_file(model):
|
||||
kwargs["gguf_file"] = Path(model).name
|
||||
gguf_model_repo = Path(model).parent
|
||||
elif is_remote_gguf(model):
|
||||
repo_id, _ = split_remote_gguf(model)
|
||||
gguf_model_repo = Path(repo_id)
|
||||
else:
|
||||
gguf_model_repo = None
|
||||
kwargs["local_files_only"] = huggingface_hub.constants.HF_HUB_OFFLINE
|
||||
@@ -678,10 +683,18 @@ def get_config(
|
||||
) -> PretrainedConfig:
|
||||
# Separate model folder from file path for GGUF models
|
||||
|
||||
is_gguf = check_gguf_file(model)
|
||||
if is_gguf:
|
||||
kwargs["gguf_file"] = Path(model).name
|
||||
model = Path(model).parent
|
||||
_is_gguf = is_gguf(model)
|
||||
_is_remote_gguf = is_remote_gguf(model)
|
||||
if _is_gguf:
|
||||
if check_gguf_file(model):
|
||||
# Local GGUF file
|
||||
kwargs["gguf_file"] = Path(model).name
|
||||
model = Path(model).parent
|
||||
elif _is_remote_gguf:
|
||||
# Remote GGUF - extract repo_id from repo_id:quant_type format
|
||||
# The actual GGUF file will be downloaded later by GGUFModelLoader
|
||||
# Keep model as repo_id:quant_type for download, but use repo_id for config
|
||||
model, _ = split_remote_gguf(model)
|
||||
|
||||
if config_format == "auto":
|
||||
try:
|
||||
@@ -689,10 +702,25 @@ def get_config(
|
||||
# Transformers implementation.
|
||||
if file_or_path_exists(model, MISTRAL_CONFIG_NAME, revision=revision):
|
||||
config_format = "mistral"
|
||||
elif is_gguf or file_or_path_exists(
|
||||
elif (_is_gguf and not _is_remote_gguf) or file_or_path_exists(
|
||||
model, HF_CONFIG_NAME, revision=revision
|
||||
):
|
||||
config_format = "hf"
|
||||
# Remote GGUF models must have config.json in repo,
|
||||
# otherwise the config can't be parsed correctly.
|
||||
# FIXME(Isotr0py): Support remote GGUF repos without config.json
|
||||
elif _is_remote_gguf and not file_or_path_exists(
|
||||
model, HF_CONFIG_NAME, revision=revision
|
||||
):
|
||||
err_msg = (
|
||||
"Could not find config.json for remote GGUF model repo. "
|
||||
"To load remote GGUF model through `<repo_id>:<quant_type>`, "
|
||||
"ensure your model has config.json (HF format) file. "
|
||||
"Otherwise please specify --hf-config-path <original_repo> "
|
||||
"in engine args to fetch config from unquantized hf model."
|
||||
)
|
||||
logger.error(err_msg)
|
||||
raise ValueError(err_msg)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Could not detect config format for no config file found. "
|
||||
@@ -713,9 +741,6 @@ def get_config(
|
||||
"'config.json'.\n"
|
||||
" - For Mistral models: ensure the presence of a "
|
||||
"'params.json'.\n"
|
||||
"3. For GGUF: pass the local path of the GGUF checkpoint.\n"
|
||||
" Loading GGUF from a remote repo directly is not yet "
|
||||
"supported.\n"
|
||||
).format(model=model)
|
||||
|
||||
raise ValueError(error_message) from e
|
||||
@@ -729,7 +754,7 @@ def get_config(
|
||||
**kwargs,
|
||||
)
|
||||
# Special architecture mapping check for GGUF models
|
||||
if is_gguf:
|
||||
if _is_gguf:
|
||||
if config.model_type not in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES:
|
||||
raise RuntimeError(f"Can't get gguf config for {config.model_type}.")
|
||||
model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES[config.model_type]
|
||||
@@ -889,6 +914,8 @@ def get_pooling_config(model: str, revision: str | None = "main") -> dict | None
|
||||
A dictionary containing the pooling type and whether
|
||||
normalization is used, or None if no pooling configuration is found.
|
||||
"""
|
||||
if is_remote_gguf(model):
|
||||
model, _ = split_remote_gguf(model)
|
||||
|
||||
modules_file_name = "modules.json"
|
||||
|
||||
@@ -1108,6 +1135,8 @@ def get_hf_image_processor_config(
|
||||
# Separate model folder from file path for GGUF models
|
||||
if check_gguf_file(model):
|
||||
model = Path(model).parent
|
||||
elif is_remote_gguf(model):
|
||||
model, _ = split_remote_gguf(model)
|
||||
return get_image_processor_config(
|
||||
model, token=hf_token, revision=revision, **kwargs
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user