[Model] Support GGUF models newly added in transformers 4.46.0 (#9685)

Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
2025-01-13 08:13:44 +08:00
parent 9597a095f2
commit d14e98d924
7 changed files with 162 additions and 87 deletions
--- a/vllm/model_executor/models/llama.py
+++ b/vllm/model_executor/models/llama.py
@@ -156,7 +156,8 @@ class LlamaAttention(nn.Module):
        )

        is_neox_style = True
-        if quant_config is not None and quant_config.get_name() == "gguf":
+        is_gguf = quant_config and quant_config.get_name() == "gguf"
+        if is_gguf and config.model_type == "llama":
            is_neox_style = False

        self.rotary_emb = get_rope(