[Bugfix] Fix LoRA weight sharding (#10450)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
2024-11-24 09:23:17 +08:00
parent 17d8fc1806
commit 1700c543a5
7 changed files with 258 additions and 168 deletions
--- a/vllm/model_executor/models/chatglm.py
+++ b/vllm/model_executor/models/chatglm.py
@@ -760,7 +760,7 @@ class ChatGLMForCausalLM(ChatGLMBaseModel, SupportsLoRA, SupportsPP,
        config = vllm_config.model_config.hf_config
        # Initialize VL
        if hasattr(config, "visual"):
-            return ChatGLM(vllm_config=vllm_config, prefix=prefix)
+            return ChatGLMV(vllm_config=vllm_config, prefix=prefix)
        # Initialize LLM
        else:
-            return ChatGLMV(vllm_config=vllm_config, prefix=prefix)
+            return ChatGLM(vllm_config=vllm_config, prefix=prefix)