Separate base model from TransformersModel (#15467)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Isotr0py <2037008807@qq.com> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
2025-03-26 10:13:38 +00:00
parent 4ec2cee000
commit cf5c8f1686
6 changed files with 110 additions and 59 deletions
--- a/vllm/model_executor/model_loader/utils.py
+++ b/vllm/model_executor/model_loader/utils.py
@@ -45,7 +45,7 @@ def is_transformers_impl_compatible(
 def resolve_transformers_fallback(model_config: ModelConfig,
                                  architectures: list[str]):
    for i, arch in enumerate(architectures):
-        if arch == "TransformersModel":
+        if arch == "TransformersForCausalLM":
            continue
        auto_map: dict[str, str] = getattr(model_config.hf_config, "auto_map",
                                           None) or dict()
@@ -69,7 +69,7 @@ def resolve_transformers_fallback(model_config: ModelConfig,
                raise ValueError(
                    f"The Transformers implementation of {arch} is not "
                    "compatible with vLLM.")
-            architectures[i] = "TransformersModel"
+            architectures[i] = "TransformersForCausalLM"
        if model_config.model_impl == ModelImpl.AUTO:
            if not is_transformers_impl_compatible(arch, custom_model_module):
                raise ValueError(
@@ -80,7 +80,7 @@ def resolve_transformers_fallback(model_config: ModelConfig,
                "%s has no vLLM implementation, falling back to Transformers "
                "implementation. Some features may not be supported and "
                "performance may not be optimal.", arch)
-            architectures[i] = "TransformersModel"
+            architectures[i] = "TransformersForCausalLM"
    return architectures