[Model] Support GGUF models newly added in transformers 4.46.0 (#9685)
Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
@@ -156,7 +156,8 @@ class LlamaAttention(nn.Module):
|
||||
)
|
||||
|
||||
is_neox_style = True
|
||||
if quant_config is not None and quant_config.get_name() == "gguf":
|
||||
is_gguf = quant_config and quant_config.get_name() == "gguf"
|
||||
if is_gguf and config.model_type == "llama":
|
||||
is_neox_style = False
|
||||
|
||||
self.rotary_emb = get_rope(
|
||||
|
||||
Reference in New Issue
Block a user