[Model] Support GGUF models newly added in transformers 4.46.0 (#9685)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
Isotr0py
2025-01-13 08:13:44 +08:00
committed by GitHub
parent 9597a095f2
commit d14e98d924
7 changed files with 162 additions and 87 deletions

View File

@@ -198,7 +198,10 @@ class GPT2Model(nn.Module):
assert not config.scale_attn_by_inverse_layer_idx
assert not config.reorder_and_upcast_attn
self.embed_dim = config.hidden_size
self.wte = VocabParallelEmbedding(config.vocab_size, self.embed_dim)
self.wte = VocabParallelEmbedding(config.vocab_size,
self.embed_dim,
quant_config=quant_config,
prefix=f"{prefix}.wte")
self.wpe = nn.Embedding(config.max_position_embeddings, self.embed_dim)
self.start_layer, self.end_layer, self.h = make_layers(
config.num_hidden_layers,
@@ -259,7 +262,9 @@ class GPT2LMHeadModel(nn.Module, SupportsPP):
self.lm_head = self.transformer.wte
else:
self.lm_head = ParallelLMHead(self.config.vocab_size,
self.config.hidden_size)
self.config.hidden_size,
quant_config=quant_config,
prefix=f"{prefix}.lm_head")
self.logits_processor = LogitsProcessor(config.vocab_size)
self.sampler = get_sampler()
self.make_empty_intermediate_tensors = (
@@ -304,7 +309,7 @@ class GPT2LMHeadModel(nn.Module, SupportsPP):
params_dict = dict(self.named_parameters(remove_duplicate=False))
loaded_params: Set[str] = set()
for name, loaded_weight in weights:
if "lm_head.weight" in name:
if name.startswith("lm_head"):
# GPT-2 ties the weights of the embedding layer and the final
# linear layer.
continue