diff --git a/vllm/model_executor/models/ernie45_moe.py b/vllm/model_executor/models/ernie45_moe.py index 452c7624d..f038cfb21 100644 --- a/vllm/model_executor/models/ernie45_moe.py +++ b/vllm/model_executor/models/ernie45_moe.py @@ -421,7 +421,6 @@ class Ernie4_5_MoeModel(nn.Module): cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.config = config parallel_config = vllm_config.parallel_config diff --git a/vllm/model_executor/models/ernie45_vl_moe.py b/vllm/model_executor/models/ernie45_vl_moe.py index 9d3cbbecf..376de71ad 100644 --- a/vllm/model_executor/models/ernie45_vl_moe.py +++ b/vllm/model_executor/models/ernie45_vl_moe.py @@ -523,7 +523,6 @@ class Ernie4_5_VLMoeModel(nn.Module): cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.config = config diff --git a/vllm/model_executor/models/granitemoeshared.py b/vllm/model_executor/models/granitemoeshared.py index 93e869814..7abc682c5 100644 --- a/vllm/model_executor/models/granitemoeshared.py +++ b/vllm/model_executor/models/granitemoeshared.py @@ -157,7 +157,6 @@ class GraniteMoeSharedModel(nn.Module): self.config = config self.quant_config = quant_config # Required by MixtralModel - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size diff --git a/vllm/model_executor/models/grok1.py b/vllm/model_executor/models/grok1.py index e2943b797..0bd6a8f3d 100644 --- a/vllm/model_executor/models/grok1.py +++ b/vllm/model_executor/models/grok1.py @@ -451,7 +451,6 @@ class Grok1Model(nn.Module): self.config = config self.quant_config = quant_config - self.padding_idx = config.pad_token_id # Store expert naming for weight loading self.ckpt_gate_proj_name = ckpt_gate_proj_name diff --git a/vllm/model_executor/models/hunyuan_v1.py b/vllm/model_executor/models/hunyuan_v1.py index a07bea16c..584645f1f 100644 --- a/vllm/model_executor/models/hunyuan_v1.py +++ b/vllm/model_executor/models/hunyuan_v1.py @@ -600,7 +600,6 @@ class HunYuanModel(nn.Module): self.config = config self.quant_config = quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size diff --git a/vllm/model_executor/models/jais2.py b/vllm/model_executor/models/jais2.py index ea06ee1b1..4e03eb12e 100644 --- a/vllm/model_executor/models/jais2.py +++ b/vllm/model_executor/models/jais2.py @@ -305,7 +305,6 @@ class Jais2Model(nn.Module): self.config = config self.quant_config = quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.org_vocab_size = config.vocab_size diff --git a/vllm/model_executor/models/kimi_linear.py b/vllm/model_executor/models/kimi_linear.py index 1793397e1..e36ff0227 100644 --- a/vllm/model_executor/models/kimi_linear.py +++ b/vllm/model_executor/models/kimi_linear.py @@ -393,7 +393,6 @@ class KimiLinearModel(nn.Module): parallel_config = vllm_config.parallel_config self.config = config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size if get_pp_group().is_first_rank: diff --git a/vllm/model_executor/models/longcat_flash.py b/vllm/model_executor/models/longcat_flash.py index 32408e7c3..c90cc2d39 100644 --- a/vllm/model_executor/models/longcat_flash.py +++ b/vllm/model_executor/models/longcat_flash.py @@ -486,7 +486,6 @@ class FlashModel(nn.Module): quant_config = vllm_config.quant_config self.config = config - self.padding_idx = getattr(config, "pad_token_id", None) self.vocab_size = config.vocab_size if get_pp_group().is_first_rank: diff --git a/vllm/model_executor/models/minimax_text_01.py b/vllm/model_executor/models/minimax_text_01.py index a7785bcfc..80c0342cc 100644 --- a/vllm/model_executor/models/minimax_text_01.py +++ b/vllm/model_executor/models/minimax_text_01.py @@ -495,7 +495,6 @@ class MiniMaxText01Model(nn.Module): cache_config = vllm_config.cache_config scheduler_config = vllm_config.scheduler_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.decoder_attention_types = getattr( diff --git a/vllm/model_executor/models/nemotron_nas.py b/vllm/model_executor/models/nemotron_nas.py index 6d796a5b2..f2f3811c0 100644 --- a/vllm/model_executor/models/nemotron_nas.py +++ b/vllm/model_executor/models/nemotron_nas.py @@ -241,7 +241,6 @@ class DeciModel(nn.Module): self.config = config self.quant_config = quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size diff --git a/vllm/model_executor/models/openpangu.py b/vllm/model_executor/models/openpangu.py index 04cdc5b6b..994ae8252 100644 --- a/vllm/model_executor/models/openpangu.py +++ b/vllm/model_executor/models/openpangu.py @@ -1029,7 +1029,6 @@ class OpenPanguModel(nn.Module): self.config = config self.num_redundant_experts = eplb_config.num_redundant_experts - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size if get_pp_group().is_first_rank or ( diff --git a/vllm/model_executor/models/plamo2.py b/vllm/model_executor/models/plamo2.py index 68f0b9550..f8fff2ccb 100644 --- a/vllm/model_executor/models/plamo2.py +++ b/vllm/model_executor/models/plamo2.py @@ -748,7 +748,6 @@ class Plamo2Model(torch.nn.Module): config = vllm_config.model_config.hf_config self.config = config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.embed_tokens = VocabParallelEmbedding( diff --git a/vllm/model_executor/models/plamo3.py b/vllm/model_executor/models/plamo3.py index 4ba51898d..1accc0541 100644 --- a/vllm/model_executor/models/plamo3.py +++ b/vllm/model_executor/models/plamo3.py @@ -317,7 +317,6 @@ class Plamo3Model(nn.Module): config = vllm_config.model_config.hf_config self.config = config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.org_vocab_size = config.vocab_size diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py index eba4b0f5f..f9da9248e 100644 --- a/vllm/model_executor/models/qwen3_moe.py +++ b/vllm/model_executor/models/qwen3_moe.py @@ -443,7 +443,6 @@ class Qwen3MoeModel(nn.Module): eplb_config = parallel_config.eplb_config self.num_redundant_experts = eplb_config.num_redundant_experts - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.config = config self.quant_config = quant_config