[Feature]: Improve GGUF loading from HuggingFace user experience like repo_id:quant_type (#29137)

Signed-off-by: Injae Ryou <injaeryou@gmail.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Injae Ryou
2025-11-25 23:28:53 +09:00
committed by GitHub
parent 0231ce836a
commit 794029f012
10 changed files with 579 additions and 36 deletions

View File

@@ -18,7 +18,7 @@ from transformers.processing_utils import ProcessorMixin
from transformers.video_processing_utils import BaseVideoProcessor
from typing_extensions import TypeVar
from vllm.transformers_utils.utils import check_gguf_file, convert_model_repo_to_path
from vllm.transformers_utils.utils import convert_model_repo_to_path, is_gguf
from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
if TYPE_CHECKING:
@@ -236,8 +236,8 @@ def cached_processor_from_config(
processor_cls: type[_P] | tuple[type[_P], ...] = ProcessorMixin,
**kwargs: Any,
) -> _P:
if check_gguf_file(model_config.model):
assert not check_gguf_file(model_config.tokenizer), (
if is_gguf(model_config.model):
assert not is_gguf(model_config.tokenizer), (
"For multimodal GGUF models, the original tokenizer "
"should be used to correctly load processor."
)
@@ -350,8 +350,8 @@ def cached_image_processor_from_config(
model_config: "ModelConfig",
**kwargs: Any,
):
if check_gguf_file(model_config.model):
assert not check_gguf_file(model_config.tokenizer), (
if is_gguf(model_config.model):
assert not is_gguf(model_config.tokenizer), (
"For multimodal GGUF models, the original tokenizer "
"should be used to correctly load image processor."
)