Remove padding_index from models that don't use it for better Transformers v5 compatibility (#35189)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -421,7 +421,6 @@ class Ernie4_5_MoeModel(nn.Module):
|
||||
cache_config = vllm_config.cache_config
|
||||
quant_config = vllm_config.quant_config
|
||||
|
||||
self.padding_idx = config.pad_token_id
|
||||
self.vocab_size = config.vocab_size
|
||||
self.config = config
|
||||
parallel_config = vllm_config.parallel_config
|
||||
|
||||
@@ -523,7 +523,6 @@ class Ernie4_5_VLMoeModel(nn.Module):
|
||||
cache_config = vllm_config.cache_config
|
||||
quant_config = vllm_config.quant_config
|
||||
|
||||
self.padding_idx = config.pad_token_id
|
||||
self.vocab_size = config.vocab_size
|
||||
self.config = config
|
||||
|
||||
|
||||
@@ -157,7 +157,6 @@ class GraniteMoeSharedModel(nn.Module):
|
||||
|
||||
self.config = config
|
||||
self.quant_config = quant_config # Required by MixtralModel
|
||||
self.padding_idx = config.pad_token_id
|
||||
|
||||
self.vocab_size = config.vocab_size
|
||||
|
||||
|
||||
@@ -451,7 +451,6 @@ class Grok1Model(nn.Module):
|
||||
|
||||
self.config = config
|
||||
self.quant_config = quant_config
|
||||
self.padding_idx = config.pad_token_id
|
||||
|
||||
# Store expert naming for weight loading
|
||||
self.ckpt_gate_proj_name = ckpt_gate_proj_name
|
||||
|
||||
@@ -600,7 +600,6 @@ class HunYuanModel(nn.Module):
|
||||
|
||||
self.config = config
|
||||
self.quant_config = quant_config
|
||||
self.padding_idx = config.pad_token_id
|
||||
|
||||
self.vocab_size = config.vocab_size
|
||||
|
||||
|
||||
@@ -305,7 +305,6 @@ class Jais2Model(nn.Module):
|
||||
|
||||
self.config = config
|
||||
self.quant_config = quant_config
|
||||
self.padding_idx = config.pad_token_id
|
||||
|
||||
self.vocab_size = config.vocab_size
|
||||
self.org_vocab_size = config.vocab_size
|
||||
|
||||
@@ -393,7 +393,6 @@ class KimiLinearModel(nn.Module):
|
||||
parallel_config = vllm_config.parallel_config
|
||||
self.config = config
|
||||
|
||||
self.padding_idx = config.pad_token_id
|
||||
self.vocab_size = config.vocab_size
|
||||
|
||||
if get_pp_group().is_first_rank:
|
||||
|
||||
@@ -486,7 +486,6 @@ class FlashModel(nn.Module):
|
||||
quant_config = vllm_config.quant_config
|
||||
self.config = config
|
||||
|
||||
self.padding_idx = getattr(config, "pad_token_id", None)
|
||||
self.vocab_size = config.vocab_size
|
||||
|
||||
if get_pp_group().is_first_rank:
|
||||
|
||||
@@ -495,7 +495,6 @@ class MiniMaxText01Model(nn.Module):
|
||||
cache_config = vllm_config.cache_config
|
||||
scheduler_config = vllm_config.scheduler_config
|
||||
|
||||
self.padding_idx = config.pad_token_id
|
||||
self.vocab_size = config.vocab_size
|
||||
|
||||
self.decoder_attention_types = getattr(
|
||||
|
||||
@@ -241,7 +241,6 @@ class DeciModel(nn.Module):
|
||||
|
||||
self.config = config
|
||||
self.quant_config = quant_config
|
||||
self.padding_idx = config.pad_token_id
|
||||
|
||||
self.vocab_size = config.vocab_size
|
||||
|
||||
|
||||
@@ -1029,7 +1029,6 @@ class OpenPanguModel(nn.Module):
|
||||
self.config = config
|
||||
self.num_redundant_experts = eplb_config.num_redundant_experts
|
||||
|
||||
self.padding_idx = config.pad_token_id
|
||||
self.vocab_size = config.vocab_size
|
||||
|
||||
if get_pp_group().is_first_rank or (
|
||||
|
||||
@@ -748,7 +748,6 @@ class Plamo2Model(torch.nn.Module):
|
||||
config = vllm_config.model_config.hf_config
|
||||
|
||||
self.config = config
|
||||
self.padding_idx = config.pad_token_id
|
||||
self.vocab_size = config.vocab_size
|
||||
|
||||
self.embed_tokens = VocabParallelEmbedding(
|
||||
|
||||
@@ -317,7 +317,6 @@ class Plamo3Model(nn.Module):
|
||||
config = vllm_config.model_config.hf_config
|
||||
|
||||
self.config = config
|
||||
self.padding_idx = config.pad_token_id
|
||||
self.vocab_size = config.vocab_size
|
||||
self.org_vocab_size = config.vocab_size
|
||||
|
||||
|
||||
@@ -443,7 +443,6 @@ class Qwen3MoeModel(nn.Module):
|
||||
eplb_config = parallel_config.eplb_config
|
||||
self.num_redundant_experts = eplb_config.num_redundant_experts
|
||||
|
||||
self.padding_idx = config.pad_token_id
|
||||
self.vocab_size = config.vocab_size
|
||||
self.config = config
|
||||
self.quant_config = quant_config
|
||||
|
||||
Reference in New Issue
Block a user