[Core] Refactor GGUF parameters packing and forwarding (#8859)

This commit is contained in:
Isotr0py
2024-10-07 18:01:46 +08:00
committed by GitHub
parent 4f95ffee6f
commit f19da64871
4 changed files with 64 additions and 62 deletions

View File

@@ -512,7 +512,7 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
quant_config=quant_config,
)
if config.tie_word_embeddings:
self.lm_head.weight = self.model.embed_tokens.weight
self.lm_head = self.model.embed_tokens
logit_scale = getattr(config, "logit_scale", 1.0)
self.logits_processor = LogitsProcessor(self.unpadded_vocab_size,