diff --git a/docs/contributing/model/basic.md b/docs/contributing/model/basic.md index 915fe1495..e2f560815 100644 --- a/docs/contributing/model/basic.md +++ b/docs/contributing/model/basic.md @@ -71,7 +71,7 @@ class MyModel(nn.Module): ```python def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_gemma_embedding.py b/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_gemma_embedding.py index b99c9629a..b7e69a147 100644 --- a/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_gemma_embedding.py +++ b/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_gemma_embedding.py @@ -36,7 +36,7 @@ class MyGemma2Embedding(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/afmoe.py b/vllm/model_executor/models/afmoe.py index ef6f59e44..00605fdc6 100644 --- a/vllm/model_executor/models/afmoe.py +++ b/vllm/model_executor/models/afmoe.py @@ -425,7 +425,7 @@ class AfmoeModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -675,7 +675,7 @@ class AfmoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/apertus.py b/vllm/model_executor/models/apertus.py index b1b6cdd81..3ae501610 100644 --- a/vllm/model_executor/models/apertus.py +++ b/vllm/model_executor/models/apertus.py @@ -542,7 +542,7 @@ class ApertusForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/arcee.py b/vllm/model_executor/models/arcee.py index 5616ffee6..ef3a4d4c3 100644 --- a/vllm/model_executor/models/arcee.py +++ b/vllm/model_executor/models/arcee.py @@ -394,7 +394,7 @@ class ArceeForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/arctic.py b/vllm/model_executor/models/arctic.py index fec672c02..cf93d2eb6 100644 --- a/vllm/model_executor/models/arctic.py +++ b/vllm/model_executor/models/arctic.py @@ -406,7 +406,7 @@ class ArcticModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -460,7 +460,7 @@ class ArcticForCausalLM(nn.Module, SupportsPP, SupportsQuant): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/aria.py b/vllm/model_executor/models/aria.py index 2c192c7d9..b8e742362 100644 --- a/vllm/model_executor/models/aria.py +++ b/vllm/model_executor/models/aria.py @@ -629,7 +629,7 @@ class AriaForConditionalGeneration(nn.Module, SupportsMultiModal): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/audioflamingo3.py b/vllm/model_executor/models/audioflamingo3.py index 5228ca70f..6c5e13f42 100644 --- a/vllm/model_executor/models/audioflamingo3.py +++ b/vllm/model_executor/models/audioflamingo3.py @@ -609,7 +609,7 @@ class AudioFlamingo3ForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/aya_vision.py b/vllm/model_executor/models/aya_vision.py index 99184f2b2..bdf3f86c4 100644 --- a/vllm/model_executor/models/aya_vision.py +++ b/vllm/model_executor/models/aya_vision.py @@ -420,7 +420,7 @@ class AyaVisionForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsP def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/bagel.py b/vllm/model_executor/models/bagel.py index 0d28a9a53..ac16538e9 100644 --- a/vllm/model_executor/models/bagel.py +++ b/vllm/model_executor/models/bagel.py @@ -507,7 +507,7 @@ class BagelForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/baichuan.py b/vllm/model_executor/models/baichuan.py index ee4a1dbd6..1e0f27ec7 100644 --- a/vllm/model_executor/models/baichuan.py +++ b/vllm/model_executor/models/baichuan.py @@ -311,7 +311,7 @@ class BaiChuanModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -428,7 +428,7 @@ class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/bailing_moe.py b/vllm/model_executor/models/bailing_moe.py index e1e675bd5..a8ee14aa0 100644 --- a/vllm/model_executor/models/bailing_moe.py +++ b/vllm/model_executor/models/bailing_moe.py @@ -440,7 +440,7 @@ class BailingMoeModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, position_ids: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -611,7 +611,7 @@ class BailingMoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/bamba.py b/vllm/model_executor/models/bamba.py index a7de8e7cf..77f49eb65 100644 --- a/vllm/model_executor/models/bamba.py +++ b/vllm/model_executor/models/bamba.py @@ -311,7 +311,7 @@ class BambaModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -493,7 +493,7 @@ class BambaForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/bert_with_rope.py b/vllm/model_executor/models/bert_with_rope.py index 02950dc9e..f200f791c 100644 --- a/vllm/model_executor/models/bert_with_rope.py +++ b/vllm/model_executor/models/bert_with_rope.py @@ -475,7 +475,7 @@ class BertWithRope(nn.Module, SupportsQuant): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/blip2.py b/vllm/model_executor/models/blip2.py index 38d809a9c..0441996f6 100644 --- a/vllm/model_executor/models/blip2.py +++ b/vllm/model_executor/models/blip2.py @@ -641,7 +641,7 @@ class Blip2ForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/bloom.py b/vllm/model_executor/models/bloom.py index 507fbf1fd..c6056329f 100644 --- a/vllm/model_executor/models/bloom.py +++ b/vllm/model_executor/models/bloom.py @@ -276,7 +276,7 @@ class BloomModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, position_ids: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -358,7 +358,7 @@ class BloomForCausalLM(nn.Module, SupportsPP, SupportsQuant): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/chameleon.py b/vllm/model_executor/models/chameleon.py index 339ecaeb7..c6c48a821 100644 --- a/vllm/model_executor/models/chameleon.py +++ b/vllm/model_executor/models/chameleon.py @@ -994,7 +994,7 @@ class ChameleonForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/chatglm.py b/vllm/model_executor/models/chatglm.py index 26181d1c9..ea4f87d97 100644 --- a/vllm/model_executor/models/chatglm.py +++ b/vllm/model_executor/models/chatglm.py @@ -362,7 +362,7 @@ class ChatGLMModel(nn.Module, SupportsQuant): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -491,7 +491,7 @@ class ChatGLMForCausalLM(ChatGLMBaseModel, SupportsLoRA, SupportsPP, SupportsQua def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/cohere2_vision.py b/vllm/model_executor/models/cohere2_vision.py index 6dd98c135..0cf2b6ba8 100644 --- a/vllm/model_executor/models/cohere2_vision.py +++ b/vllm/model_executor/models/cohere2_vision.py @@ -446,7 +446,7 @@ class Cohere2VisionForConditionalGeneration(nn.Module, SupportsMultiModal, Suppo def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/commandr.py b/vllm/model_executor/models/commandr.py index 63a93eaa2..609512078 100644 --- a/vllm/model_executor/models/commandr.py +++ b/vllm/model_executor/models/commandr.py @@ -312,7 +312,7 @@ class CohereModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -438,7 +438,7 @@ class CohereForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant): @torch.no_grad() def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/dbrx.py b/vllm/model_executor/models/dbrx.py index db4fe61b0..8cef4b428 100644 --- a/vllm/model_executor/models/dbrx.py +++ b/vllm/model_executor/models/dbrx.py @@ -361,7 +361,7 @@ class DbrxModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, position_ids: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -462,7 +462,7 @@ class DbrxForCausalLM(nn.Module, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/deepseek_mtp.py b/vllm/model_executor/models/deepseek_mtp.py index 8fb2bfb16..cabc46983 100644 --- a/vllm/model_executor/models/deepseek_mtp.py +++ b/vllm/model_executor/models/deepseek_mtp.py @@ -213,7 +213,7 @@ class DeepSeekMTP(nn.Module, DeepseekV2MixtureOfExperts): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, hidden_states: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, diff --git a/vllm/model_executor/models/deepseek_ocr.py b/vllm/model_executor/models/deepseek_ocr.py index bfbf06467..570ab5484 100644 --- a/vllm/model_executor/models/deepseek_ocr.py +++ b/vllm/model_executor/models/deepseek_ocr.py @@ -562,7 +562,7 @@ class DeepseekOCRForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, Supports def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index c8b6533dc..5649f8bd1 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -1085,7 +1085,7 @@ class DeepseekV2Model(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -1255,7 +1255,7 @@ class DeepseekV2ForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/deepseek_vl2.py b/vllm/model_executor/models/deepseek_vl2.py index 35700ae95..cb98640ce 100644 --- a/vllm/model_executor/models/deepseek_vl2.py +++ b/vllm/model_executor/models/deepseek_vl2.py @@ -614,7 +614,7 @@ class DeepseekVLV2ForCausalLM(nn.Module, SupportsMultiModal, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/dots1.py b/vllm/model_executor/models/dots1.py index b64f16376..b69d87f1e 100644 --- a/vllm/model_executor/models/dots1.py +++ b/vllm/model_executor/models/dots1.py @@ -394,7 +394,7 @@ class Dots1Model(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -538,7 +538,7 @@ class Dots1ForCausalLM(nn.Module, SupportsPP, SupportsLoRA): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/dots_ocr.py b/vllm/model_executor/models/dots_ocr.py index c44992a9a..fa5a5cc7f 100644 --- a/vllm/model_executor/models/dots_ocr.py +++ b/vllm/model_executor/models/dots_ocr.py @@ -754,7 +754,7 @@ class DotsOCRForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/eagle2_5_vl.py b/vllm/model_executor/models/eagle2_5_vl.py index 419522ade..3ce9b9c4d 100644 --- a/vllm/model_executor/models/eagle2_5_vl.py +++ b/vllm/model_executor/models/eagle2_5_vl.py @@ -432,7 +432,7 @@ class Eagle2_5_VLForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -440,7 +440,6 @@ class Eagle2_5_VLForConditionalGeneration( ) -> IntermediateTensors: """Forward pass through the model.""" if intermediate_tensors is not None: - input_ids = None inputs_embeds = None forward_kwargs = { diff --git a/vllm/model_executor/models/ernie45_moe.py b/vllm/model_executor/models/ernie45_moe.py index 34da3e7c7..be153dbcf 100644 --- a/vllm/model_executor/models/ernie45_moe.py +++ b/vllm/model_executor/models/ernie45_moe.py @@ -466,7 +466,7 @@ class Ernie4_5_MoeModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -728,7 +728,7 @@ class Ernie4_5_MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA, MixtureOfExpe def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/ernie45_vl.py b/vllm/model_executor/models/ernie45_vl.py index 869a9a458..85c447c9b 100644 --- a/vllm/model_executor/models/ernie45_vl.py +++ b/vllm/model_executor/models/ernie45_vl.py @@ -1650,7 +1650,7 @@ class Ernie4_5_VLMoeForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/ernie45_vl_moe.py b/vllm/model_executor/models/ernie45_vl_moe.py index 2be22e0e3..63da84d69 100644 --- a/vllm/model_executor/models/ernie45_vl_moe.py +++ b/vllm/model_executor/models/ernie45_vl_moe.py @@ -565,7 +565,7 @@ class Ernie4_5_VLMoeModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -646,7 +646,7 @@ class Ernie4_5_VLMoeForCausalLM(nn.Module, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/ernie_mtp.py b/vllm/model_executor/models/ernie_mtp.py index 05c4277b1..ef37fd355 100644 --- a/vllm/model_executor/models/ernie_mtp.py +++ b/vllm/model_executor/models/ernie_mtp.py @@ -164,7 +164,7 @@ class ErnieMTP(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, hidden_states: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, diff --git a/vllm/model_executor/models/exaone.py b/vllm/model_executor/models/exaone.py index 039e7cf68..6cafbfb57 100644 --- a/vllm/model_executor/models/exaone.py +++ b/vllm/model_executor/models/exaone.py @@ -496,7 +496,7 @@ class ExaoneForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/exaone4.py b/vllm/model_executor/models/exaone4.py index cff82396a..5b2ef9082 100644 --- a/vllm/model_executor/models/exaone4.py +++ b/vllm/model_executor/models/exaone4.py @@ -490,7 +490,7 @@ class Exaone4ForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/exaone_moe.py b/vllm/model_executor/models/exaone_moe.py index ccff419a4..d7282edcf 100644 --- a/vllm/model_executor/models/exaone_moe.py +++ b/vllm/model_executor/models/exaone_moe.py @@ -549,7 +549,7 @@ class ExaoneMoeForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/falcon.py b/vllm/model_executor/models/falcon.py index 7cdfcae0e..77cb68c4c 100644 --- a/vllm/model_executor/models/falcon.py +++ b/vllm/model_executor/models/falcon.py @@ -402,7 +402,7 @@ class FalconModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/falcon_h1.py b/vllm/model_executor/models/falcon_h1.py index 49722b6d7..582f1d244 100644 --- a/vllm/model_executor/models/falcon_h1.py +++ b/vllm/model_executor/models/falcon_h1.py @@ -459,7 +459,7 @@ class FalconH1Model(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -602,7 +602,7 @@ class FalconH1ForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/fuyu.py b/vllm/model_executor/models/fuyu.py index 0733c2f51..50708f4b9 100644 --- a/vllm/model_executor/models/fuyu.py +++ b/vllm/model_executor/models/fuyu.py @@ -340,7 +340,7 @@ class FuyuForCausalLM(nn.Module, SupportsMultiModal, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/gemma.py b/vllm/model_executor/models/gemma.py index 7304a7280..4d016f286 100644 --- a/vllm/model_executor/models/gemma.py +++ b/vllm/model_executor/models/gemma.py @@ -297,7 +297,7 @@ class GemmaModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -400,7 +400,7 @@ class GemmaForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/gemma2.py b/vllm/model_executor/models/gemma2.py index fe6ec5ff8..6d946522f 100644 --- a/vllm/model_executor/models/gemma2.py +++ b/vllm/model_executor/models/gemma2.py @@ -410,7 +410,7 @@ class Gemma2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/gemma3.py b/vllm/model_executor/models/gemma3.py index c8a0ba8c9..502fe6b82 100644 --- a/vllm/model_executor/models/gemma3.py +++ b/vllm/model_executor/models/gemma3.py @@ -494,7 +494,7 @@ class Gemma3ForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/gemma3_mm.py b/vllm/model_executor/models/gemma3_mm.py index 03aebde83..aa44582ed 100644 --- a/vllm/model_executor/models/gemma3_mm.py +++ b/vllm/model_executor/models/gemma3_mm.py @@ -618,7 +618,7 @@ class Gemma3ForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/gemma3n.py b/vllm/model_executor/models/gemma3n.py index 4d446f51c..bdbb3c91e 100644 --- a/vllm/model_executor/models/gemma3n.py +++ b/vllm/model_executor/models/gemma3n.py @@ -704,7 +704,7 @@ class Gemma3nSelfDecoder(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, inputs_embeds: torch.Tensor | None = None, per_layer_inputs: torch.Tensor | None = None, @@ -887,7 +887,7 @@ class Gemma3nTextModel(nn.Module, SupportsQuant): def fast_prefill_forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, inputs_embeds: torch.Tensor | None = None, per_layer_inputs: torch.Tensor | None = None, @@ -964,7 +964,7 @@ class Gemma3nTextModel(nn.Module, SupportsQuant): def normal_forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, inputs_embeds: torch.Tensor | None = None, per_layer_inputs: torch.Tensor | None = None, @@ -1131,7 +1131,7 @@ class Gemma3nForCausalLM(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, *, per_layer_inputs: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/gemma3n_mm.py b/vllm/model_executor/models/gemma3n_mm.py index be520d117..2e5287220 100644 --- a/vllm/model_executor/models/gemma3n_mm.py +++ b/vllm/model_executor/models/gemma3n_mm.py @@ -707,7 +707,7 @@ class Gemma3nForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/glm4.py b/vllm/model_executor/models/glm4.py index 06da2a8b3..d3f089561 100644 --- a/vllm/model_executor/models/glm4.py +++ b/vllm/model_executor/models/glm4.py @@ -270,7 +270,7 @@ class Glm4ForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py index 5db7a18f6..b6b28e5c5 100644 --- a/vllm/model_executor/models/glm4_1v.py +++ b/vllm/model_executor/models/glm4_1v.py @@ -1711,7 +1711,7 @@ class Glm4vForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/glm4_moe.py b/vllm/model_executor/models/glm4_moe.py index efa6c1cfe..4c60cd460 100644 --- a/vllm/model_executor/models/glm4_moe.py +++ b/vllm/model_executor/models/glm4_moe.py @@ -451,7 +451,7 @@ class Glm4MoeModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -687,7 +687,7 @@ class Glm4MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA, Glm4MixtureOfExper def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/glm4_moe_lite.py b/vllm/model_executor/models/glm4_moe_lite.py index b8ad273a1..481992969 100644 --- a/vllm/model_executor/models/glm4_moe_lite.py +++ b/vllm/model_executor/models/glm4_moe_lite.py @@ -264,7 +264,7 @@ class Glm4MoeLiteModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -596,7 +596,7 @@ class Glm4MoeLiteForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/glm4_moe_lite_mtp.py b/vllm/model_executor/models/glm4_moe_lite_mtp.py index c1e365e19..efa96c40d 100644 --- a/vllm/model_executor/models/glm4_moe_lite_mtp.py +++ b/vllm/model_executor/models/glm4_moe_lite_mtp.py @@ -230,7 +230,7 @@ class Glm4MoeLiteMTP(nn.Module, SupportsPP, Glm4MixtureOfExperts): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, hidden_states: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, diff --git a/vllm/model_executor/models/glm4_moe_mtp.py b/vllm/model_executor/models/glm4_moe_mtp.py index fbbaa5214..cde94673e 100644 --- a/vllm/model_executor/models/glm4_moe_mtp.py +++ b/vllm/model_executor/models/glm4_moe_mtp.py @@ -216,7 +216,7 @@ class Glm4MoeMTP(nn.Module, Glm4MixtureOfExperts): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, hidden_states: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, diff --git a/vllm/model_executor/models/glm4v.py b/vllm/model_executor/models/glm4v.py index fca7c49cc..59ba0fccc 100644 --- a/vllm/model_executor/models/glm4v.py +++ b/vllm/model_executor/models/glm4v.py @@ -769,7 +769,7 @@ class GLM4VForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/glmasr.py b/vllm/model_executor/models/glmasr.py index d17fe7fcb..a14aa47ed 100644 --- a/vllm/model_executor/models/glmasr.py +++ b/vllm/model_executor/models/glmasr.py @@ -1075,7 +1075,7 @@ class GlmAsrForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/gpt2.py b/vllm/model_executor/models/gpt2.py index bacf30d12..4026d69cd 100644 --- a/vllm/model_executor/models/gpt2.py +++ b/vllm/model_executor/models/gpt2.py @@ -218,7 +218,7 @@ class GPT2Model(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, position_ids: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None, @@ -298,7 +298,7 @@ class GPT2LMHeadModel(nn.Module, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -362,7 +362,7 @@ class GPT2ForSequenceClassification(nn.Module, SupportsCrossEncoding): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/gpt_bigcode.py b/vllm/model_executor/models/gpt_bigcode.py index a405fd184..8af41d004 100644 --- a/vllm/model_executor/models/gpt_bigcode.py +++ b/vllm/model_executor/models/gpt_bigcode.py @@ -235,7 +235,7 @@ class GPTBigCodeModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, position_ids: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -311,7 +311,7 @@ class GPTBigCodeForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/gpt_j.py b/vllm/model_executor/models/gpt_j.py index f32ac2639..366f0fd90 100644 --- a/vllm/model_executor/models/gpt_j.py +++ b/vllm/model_executor/models/gpt_j.py @@ -220,7 +220,7 @@ class GPTJModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, position_ids: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -324,7 +324,7 @@ class GPTJForCausalLM(nn.Module, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/gpt_neox.py b/vllm/model_executor/models/gpt_neox.py index d994e380d..764a801db 100644 --- a/vllm/model_executor/models/gpt_neox.py +++ b/vllm/model_executor/models/gpt_neox.py @@ -230,7 +230,7 @@ class GPTNeoXModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, position_ids: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -318,7 +318,7 @@ class GPTNeoXForCausalLM(nn.Module, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/gpt_oss.py b/vllm/model_executor/models/gpt_oss.py index 69678188a..acaf099ed 100644 --- a/vllm/model_executor/models/gpt_oss.py +++ b/vllm/model_executor/models/gpt_oss.py @@ -275,7 +275,7 @@ class GptOssModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -714,7 +714,7 @@ class GptOssForCausalLM(nn.Module, SupportsPP, SupportsEagle3, SupportsLoRA): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/granite.py b/vllm/model_executor/models/granite.py index 82c945f5a..53c3230b2 100644 --- a/vllm/model_executor/models/granite.py +++ b/vllm/model_executor/models/granite.py @@ -437,7 +437,7 @@ class GraniteForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/granite_speech.py b/vllm/model_executor/models/granite_speech.py index ae9a6a211..22296bf59 100644 --- a/vllm/model_executor/models/granite_speech.py +++ b/vllm/model_executor/models/granite_speech.py @@ -806,7 +806,7 @@ class GraniteSpeechForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/granitemoe.py b/vllm/model_executor/models/granitemoe.py index 237fabff9..33446e744 100644 --- a/vllm/model_executor/models/granitemoe.py +++ b/vllm/model_executor/models/granitemoe.py @@ -312,7 +312,7 @@ class GraniteMoeModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -528,7 +528,7 @@ class GraniteMoeForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/granitemoehybrid.py b/vllm/model_executor/models/granitemoehybrid.py index 0b601b4b8..b6a3c4872 100644 --- a/vllm/model_executor/models/granitemoehybrid.py +++ b/vllm/model_executor/models/granitemoehybrid.py @@ -368,7 +368,7 @@ class GraniteMoeHybridModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -685,7 +685,7 @@ class GraniteMoeHybridForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/granitemoeshared.py b/vllm/model_executor/models/granitemoeshared.py index 8ad5a7105..93e869814 100644 --- a/vllm/model_executor/models/granitemoeshared.py +++ b/vllm/model_executor/models/granitemoeshared.py @@ -183,7 +183,7 @@ class GraniteMoeSharedModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -295,7 +295,7 @@ class GraniteMoeSharedForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/grok1.py b/vllm/model_executor/models/grok1.py index 43c658a2c..49bdc0241 100644 --- a/vllm/model_executor/models/grok1.py +++ b/vllm/model_executor/models/grok1.py @@ -491,7 +491,7 @@ class Grok1Model(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -705,7 +705,7 @@ class GrokBaseForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/hunyuan_v1.py b/vllm/model_executor/models/hunyuan_v1.py index 1cf6e824f..df507a234 100644 --- a/vllm/model_executor/models/hunyuan_v1.py +++ b/vllm/model_executor/models/hunyuan_v1.py @@ -938,7 +938,7 @@ class HunyuanV1ModelBase(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/hunyuan_vision.py b/vllm/model_executor/models/hunyuan_vision.py index a4e309ee9..9214f47c7 100644 --- a/vllm/model_executor/models/hunyuan_vision.py +++ b/vllm/model_executor/models/hunyuan_vision.py @@ -968,7 +968,7 @@ class HunYuanVLForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None, diff --git a/vllm/model_executor/models/hyperclovax_vision.py b/vllm/model_executor/models/hyperclovax_vision.py index 062ad2eb3..6a1f58af2 100644 --- a/vllm/model_executor/models/hyperclovax_vision.py +++ b/vllm/model_executor/models/hyperclovax_vision.py @@ -747,7 +747,7 @@ class HCXVisionForCausalLM(nn.Module, SupportsMultiModal, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/idefics3.py b/vllm/model_executor/models/idefics3.py index fc88f07be..dfd5869b4 100644 --- a/vllm/model_executor/models/idefics3.py +++ b/vllm/model_executor/models/idefics3.py @@ -559,7 +559,7 @@ class Idefics3Model(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -689,7 +689,7 @@ class Idefics3ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsLo def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py index bd507c398..65588ac33 100644 --- a/vllm/model_executor/models/interfaces.py +++ b/vllm/model_executor/models/interfaces.py @@ -603,6 +603,8 @@ class SupportsPP(Protocol): def forward( self, + input_ids: Tensor | None, + positions: Tensor, *, intermediate_tensors: IntermediateTensors | None, ) -> IntermediateTensors | None: @@ -631,6 +633,8 @@ class _SupportsPPType(Protocol): def forward( self, + input_ids: Tensor | None, + positions: Tensor, *, intermediate_tensors: IntermediateTensors | None, ) -> Tensor | IntermediateTensors: ... diff --git a/vllm/model_executor/models/internlm2.py b/vllm/model_executor/models/internlm2.py index 5dec47e09..60db4c4c6 100644 --- a/vllm/model_executor/models/internlm2.py +++ b/vllm/model_executor/models/internlm2.py @@ -284,7 +284,7 @@ class InternLM2Model(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -350,7 +350,7 @@ class InternLM2ForCausalLM(nn.Module, SupportsPP, SupportsLoRA): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -446,7 +446,7 @@ class InternLM2ForRewardModel(InternLM2ForCausalLM): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/internlm2_ve.py b/vllm/model_executor/models/internlm2_ve.py index a57db8224..da0dfe73e 100644 --- a/vllm/model_executor/models/internlm2_ve.py +++ b/vllm/model_executor/models/internlm2_ve.py @@ -101,7 +101,7 @@ class InternLM2VEModel(InternLM2Model): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/interns1.py b/vllm/model_executor/models/interns1.py index 13479e306..de306341c 100644 --- a/vllm/model_executor/models/interns1.py +++ b/vllm/model_executor/models/interns1.py @@ -782,7 +782,7 @@ class InternS1ForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/internvl.py b/vllm/model_executor/models/internvl.py index 777c4274e..ed6060fa2 100644 --- a/vllm/model_executor/models/internvl.py +++ b/vllm/model_executor/models/internvl.py @@ -1371,7 +1371,7 @@ class InternVLChatModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA) def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/iquest_loopcoder.py b/vllm/model_executor/models/iquest_loopcoder.py index 1901cc6e8..9dd6a08d1 100644 --- a/vllm/model_executor/models/iquest_loopcoder.py +++ b/vllm/model_executor/models/iquest_loopcoder.py @@ -438,7 +438,7 @@ class IQuestLoopCoderModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -570,7 +570,7 @@ class IQuestLoopCoderForCausalLM(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/isaac.py b/vllm/model_executor/models/isaac.py index 382982805..704ade320 100644 --- a/vllm/model_executor/models/isaac.py +++ b/vllm/model_executor/models/isaac.py @@ -1450,7 +1450,7 @@ class IsaacForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/jais.py b/vllm/model_executor/models/jais.py index 601228881..2173b7e4a 100644 --- a/vllm/model_executor/models/jais.py +++ b/vllm/model_executor/models/jais.py @@ -280,7 +280,7 @@ class JAISModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, position_ids: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -344,7 +344,7 @@ class JAISLMHeadModel(nn.Module, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/jais2.py b/vllm/model_executor/models/jais2.py index aacc4abd4..265a57db5 100644 --- a/vllm/model_executor/models/jais2.py +++ b/vllm/model_executor/models/jais2.py @@ -483,7 +483,7 @@ class Jais2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/jamba.py b/vllm/model_executor/models/jamba.py index eeca3cf78..27f14374c 100644 --- a/vllm/model_executor/models/jamba.py +++ b/vllm/model_executor/models/jamba.py @@ -348,7 +348,7 @@ class JambaModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -516,7 +516,7 @@ class JambaForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/jina_vl.py b/vllm/model_executor/models/jina_vl.py index c03fa211a..6970f74a2 100644 --- a/vllm/model_executor/models/jina_vl.py +++ b/vllm/model_executor/models/jina_vl.py @@ -125,7 +125,7 @@ class JinaVLForSequenceClassification( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/kanana_v.py b/vllm/model_executor/models/kanana_v.py index 1676520fc..06ea26155 100644 --- a/vllm/model_executor/models/kanana_v.py +++ b/vllm/model_executor/models/kanana_v.py @@ -732,7 +732,7 @@ class KananaVForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP) def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/keye.py b/vllm/model_executor/models/keye.py index f4c22fa92..b32f2762e 100644 --- a/vllm/model_executor/models/keye.py +++ b/vllm/model_executor/models/keye.py @@ -1438,7 +1438,7 @@ class BaseKeyeModule(nn.Module, SupportsMultiModal): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/kimi_linear.py b/vllm/model_executor/models/kimi_linear.py index f3ec5b759..1793397e1 100644 --- a/vllm/model_executor/models/kimi_linear.py +++ b/vllm/model_executor/models/kimi_linear.py @@ -506,7 +506,7 @@ class KimiLinearForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/kimi_vl.py b/vllm/model_executor/models/kimi_vl.py index 11cb2336d..1624089c8 100644 --- a/vllm/model_executor/models/kimi_vl.py +++ b/vllm/model_executor/models/kimi_vl.py @@ -389,7 +389,7 @@ class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/lfm2.py b/vllm/model_executor/models/lfm2.py index 629a72f39..ba5e80ac7 100644 --- a/vllm/model_executor/models/lfm2.py +++ b/vllm/model_executor/models/lfm2.py @@ -342,7 +342,7 @@ class Lfm2Model(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -503,7 +503,7 @@ class Lfm2ForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/lfm2_moe.py b/vllm/model_executor/models/lfm2_moe.py index 4704967b5..6d786276a 100644 --- a/vllm/model_executor/models/lfm2_moe.py +++ b/vllm/model_executor/models/lfm2_moe.py @@ -457,7 +457,7 @@ class Lfm2MoeModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -730,7 +730,7 @@ class Lfm2MoeForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/lfm2_vl.py b/vllm/model_executor/models/lfm2_vl.py index 8febeb4c0..c0fa8404f 100644 --- a/vllm/model_executor/models/lfm2_vl.py +++ b/vllm/model_executor/models/lfm2_vl.py @@ -769,7 +769,7 @@ class Lfm2VLForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index 31858a365..29cbea187 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -581,7 +581,7 @@ class LlamaForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index 37cf301a2..6afe64776 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -662,7 +662,7 @@ class LlavaForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/llava_next.py b/vllm/model_executor/models/llava_next.py index a6cc21d8c..9f83c7910 100644 --- a/vllm/model_executor/models/llava_next.py +++ b/vllm/model_executor/models/llava_next.py @@ -509,7 +509,7 @@ class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsP def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/llava_next_video.py b/vllm/model_executor/models/llava_next_video.py index 837dc2ee6..1aee7f9c5 100644 --- a/vllm/model_executor/models/llava_next_video.py +++ b/vllm/model_executor/models/llava_next_video.py @@ -426,7 +426,7 @@ class LlavaNextVideoForConditionalGeneration(nn.Module, SupportsMultiModal, Supp def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/llava_onevision.py b/vllm/model_executor/models/llava_onevision.py index 3b91ebe29..d49c08eb3 100644 --- a/vllm/model_executor/models/llava_onevision.py +++ b/vllm/model_executor/models/llava_onevision.py @@ -887,7 +887,7 @@ class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal, Supp def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/longcat_flash.py b/vllm/model_executor/models/longcat_flash.py index fed3a1cae..f8b426df0 100644 --- a/vllm/model_executor/models/longcat_flash.py +++ b/vllm/model_executor/models/longcat_flash.py @@ -520,7 +520,7 @@ class FlashModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -605,7 +605,7 @@ class LongcatFlashForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/longcat_flash_mtp.py b/vllm/model_executor/models/longcat_flash_mtp.py index f96d3cf28..13921d735 100644 --- a/vllm/model_executor/models/longcat_flash_mtp.py +++ b/vllm/model_executor/models/longcat_flash_mtp.py @@ -150,7 +150,7 @@ class LongCatFlashMTP(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, hidden_states: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, diff --git a/vllm/model_executor/models/mamba.py b/vllm/model_executor/models/mamba.py index 85212feca..ec2a7255e 100644 --- a/vllm/model_executor/models/mamba.py +++ b/vllm/model_executor/models/mamba.py @@ -142,7 +142,7 @@ class MambaModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -225,7 +225,7 @@ class MambaForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/mamba2.py b/vllm/model_executor/models/mamba2.py index ed363df21..f1c34abf2 100644 --- a/vllm/model_executor/models/mamba2.py +++ b/vllm/model_executor/models/mamba2.py @@ -137,7 +137,7 @@ class Mamba2Model(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -267,7 +267,7 @@ class Mamba2ForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/midashenglm.py b/vllm/model_executor/models/midashenglm.py index 8d89af52c..b2c23c29d 100644 --- a/vllm/model_executor/models/midashenglm.py +++ b/vllm/model_executor/models/midashenglm.py @@ -796,7 +796,7 @@ class MiDashengLMModel(nn.Module, SupportsMultiModal, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/mimo.py b/vllm/model_executor/models/mimo.py index cd0a6190e..a7699f0d5 100644 --- a/vllm/model_executor/models/mimo.py +++ b/vllm/model_executor/models/mimo.py @@ -61,7 +61,7 @@ logger = init_logger(__name__) class MiMoModel(Qwen2Model): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/mimo_mtp.py b/vllm/model_executor/models/mimo_mtp.py index 9905f65b7..3558ddf39 100644 --- a/vllm/model_executor/models/mimo_mtp.py +++ b/vllm/model_executor/models/mimo_mtp.py @@ -169,7 +169,7 @@ class MiMoMTP(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, hidden_states: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, diff --git a/vllm/model_executor/models/mimo_v2_flash.py b/vllm/model_executor/models/mimo_v2_flash.py index db85073b3..9d8c26e46 100644 --- a/vllm/model_executor/models/mimo_v2_flash.py +++ b/vllm/model_executor/models/mimo_v2_flash.py @@ -478,7 +478,7 @@ class MiMoV2Model(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -693,7 +693,7 @@ class MiMoV2FlashForCausalLM(nn.Module, SupportsPP, MixtureOfExperts): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/minicpm.py b/vllm/model_executor/models/minicpm.py index 8f7b8b21b..023d08691 100644 --- a/vllm/model_executor/models/minicpm.py +++ b/vllm/model_executor/models/minicpm.py @@ -440,7 +440,7 @@ class MiniCPMModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -620,7 +620,7 @@ class MiniCPMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsEagle3): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/minicpmv.py b/vllm/model_executor/models/minicpmv.py index d9179250b..50a420759 100644 --- a/vllm/model_executor/models/minicpmv.py +++ b/vllm/model_executor/models/minicpmv.py @@ -1147,7 +1147,7 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/minimax_m2.py b/vllm/model_executor/models/minimax_m2.py index 95b036ac2..be5f0b921 100644 --- a/vllm/model_executor/models/minimax_m2.py +++ b/vllm/model_executor/models/minimax_m2.py @@ -361,7 +361,7 @@ class MiniMaxM2Model(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -520,7 +520,7 @@ class MiniMaxM2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/minimax_text_01.py b/vllm/model_executor/models/minimax_text_01.py index 44417c98b..b91321aed 100644 --- a/vllm/model_executor/models/minimax_text_01.py +++ b/vllm/model_executor/models/minimax_text_01.py @@ -712,7 +712,7 @@ class MiniMaxText01ForCausalLM(nn.Module, HasInnerState, IsHybrid): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/minimax_vl_01.py b/vllm/model_executor/models/minimax_vl_01.py index b4bc1388a..d2bdeb0bc 100644 --- a/vllm/model_executor/models/minimax_vl_01.py +++ b/vllm/model_executor/models/minimax_vl_01.py @@ -359,7 +359,7 @@ class MiniMaxVL01ForConditionalGeneration(nn.Module, SupportsMultiModal, Support def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/mistral3.py b/vllm/model_executor/models/mistral3.py index 9b49f7b47..1162217b6 100644 --- a/vllm/model_executor/models/mistral3.py +++ b/vllm/model_executor/models/mistral3.py @@ -539,7 +539,7 @@ class Mistral3ForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index 89dab5f3c..8d1197800 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -338,7 +338,7 @@ class MixtralModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -574,7 +574,7 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP, MixtureOfExperts): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/mllama4.py b/vllm/model_executor/models/mllama4.py index 7db496758..be34e436a 100644 --- a/vllm/model_executor/models/mllama4.py +++ b/vllm/model_executor/models/mllama4.py @@ -901,7 +901,7 @@ class Llama4ForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/modernbert.py b/vllm/model_executor/models/modernbert.py index f3fec2bfb..a8119b046 100644 --- a/vllm/model_executor/models/modernbert.py +++ b/vllm/model_executor/models/modernbert.py @@ -54,12 +54,11 @@ class ModernBertEmbeddings(nn.Module): input_ids: torch.Tensor, inputs_embeds: torch.Tensor | None = None, ) -> torch.Tensor: - if inputs_embeds is not None: - return self.norm(inputs_embeds) - else: + if inputs_embeds is None: inputs_embeds = self.tok_embeddings(input_ids) - embeddings = self.norm(inputs_embeds) - return embeddings + + embeddings = self.norm(inputs_embeds) + return embeddings class ModernBertAttention(nn.Module): diff --git a/vllm/model_executor/models/molmo.py b/vllm/model_executor/models/molmo.py index 6864279ed..7ba1382e3 100644 --- a/vllm/model_executor/models/molmo.py +++ b/vllm/model_executor/models/molmo.py @@ -871,7 +871,7 @@ class MolmoModel(nn.Module, SupportsQuant): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/molmo2.py b/vllm/model_executor/models/molmo2.py index b725d023f..b0a75c18b 100644 --- a/vllm/model_executor/models/molmo2.py +++ b/vllm/model_executor/models/molmo2.py @@ -1217,7 +1217,7 @@ class Molmo2TextModel(nn.Module, SupportsQuant): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/mpt.py b/vllm/model_executor/models/mpt.py index 1e285646b..5d039f7b4 100644 --- a/vllm/model_executor/models/mpt.py +++ b/vllm/model_executor/models/mpt.py @@ -253,7 +253,7 @@ class MPTModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, position_ids: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -313,7 +313,7 @@ class MPTForCausalLM(nn.Module, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/nano_nemotron_vl.py b/vllm/model_executor/models/nano_nemotron_vl.py index c38c5354d..21e2715fe 100644 --- a/vllm/model_executor/models/nano_nemotron_vl.py +++ b/vllm/model_executor/models/nano_nemotron_vl.py @@ -1917,7 +1917,7 @@ class NemotronH_Nano_VL_V2( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/nemotron.py b/vllm/model_executor/models/nemotron.py index 21605015c..c416cbb15 100644 --- a/vllm/model_executor/models/nemotron.py +++ b/vllm/model_executor/models/nemotron.py @@ -477,7 +477,7 @@ class NemotronForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/nemotron_h.py b/vllm/model_executor/models/nemotron_h.py index 4318c53aa..999949fa1 100644 --- a/vllm/model_executor/models/nemotron_h.py +++ b/vllm/model_executor/models/nemotron_h.py @@ -601,7 +601,7 @@ class NemotronHModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -887,7 +887,7 @@ class NemotronHForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/nemotron_nas.py b/vllm/model_executor/models/nemotron_nas.py index 6ff4f0e84..080af5e96 100644 --- a/vllm/model_executor/models/nemotron_nas.py +++ b/vllm/model_executor/models/nemotron_nas.py @@ -449,7 +449,7 @@ class DeciLMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, HasNoOps): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/nemotron_parse.py b/vllm/model_executor/models/nemotron_parse.py index 8f66bb897..b14bf09ab 100644 --- a/vllm/model_executor/models/nemotron_parse.py +++ b/vllm/model_executor/models/nemotron_parse.py @@ -290,7 +290,7 @@ class MBartDecoderNoPos(nn.Module): def forward( self, - decoder_input_ids: torch.Tensor, + decoder_input_ids: torch.Tensor | None, *, encoder_hidden_states: torch.Tensor | None, inputs_embeds: torch.Tensor | None = None, @@ -894,7 +894,7 @@ class NemotronParseForConditionalGeneration(nn.Module, SupportsMultiModal): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, encoder_outputs: list[torch.Tensor] | None = None, **kwargs, diff --git a/vllm/model_executor/models/nemotron_vl.py b/vllm/model_executor/models/nemotron_vl.py index 620cee109..7b87b6160 100644 --- a/vllm/model_executor/models/nemotron_vl.py +++ b/vllm/model_executor/models/nemotron_vl.py @@ -597,7 +597,7 @@ class LlamaNemotronVLChatModel(nn.Module, SupportsMultiModal, SupportsPP, Suppor def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/olmo.py b/vllm/model_executor/models/olmo.py index dd7c27f10..3ed316dbe 100644 --- a/vllm/model_executor/models/olmo.py +++ b/vllm/model_executor/models/olmo.py @@ -271,7 +271,7 @@ class OlmoModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -382,7 +382,7 @@ class OlmoForCausalLM(nn.Module, SupportsPP, SupportsLoRA): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/olmo2.py b/vllm/model_executor/models/olmo2.py index b030c94b5..5019cd787 100644 --- a/vllm/model_executor/models/olmo2.py +++ b/vllm/model_executor/models/olmo2.py @@ -309,7 +309,7 @@ class Olmo2Model(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -424,7 +424,7 @@ class Olmo2ForCausalLM(nn.Module, SupportsPP, SupportsLoRA): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/olmoe.py b/vllm/model_executor/models/olmoe.py index b4cf98de1..d9695f8f2 100644 --- a/vllm/model_executor/models/olmoe.py +++ b/vllm/model_executor/models/olmoe.py @@ -300,7 +300,7 @@ class OlmoeModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -476,7 +476,7 @@ class OlmoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/openpangu.py b/vllm/model_executor/models/openpangu.py index 9f569bcc7..982c4e1fc 100644 --- a/vllm/model_executor/models/openpangu.py +++ b/vllm/model_executor/models/openpangu.py @@ -1056,7 +1056,7 @@ class OpenPanguModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -1286,7 +1286,7 @@ class OpenPanguModelBase(nn.Module, SupportsPP, SupportsLoRA): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/openpangu_mtp.py b/vllm/model_executor/models/openpangu_mtp.py index 273351051..91b454a4b 100644 --- a/vllm/model_executor/models/openpangu_mtp.py +++ b/vllm/model_executor/models/openpangu_mtp.py @@ -104,7 +104,7 @@ class OpenPanguMTP(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, hidden_states: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, diff --git a/vllm/model_executor/models/opt.py b/vllm/model_executor/models/opt.py index bba5291ea..4c64b5771 100644 --- a/vllm/model_executor/models/opt.py +++ b/vllm/model_executor/models/opt.py @@ -267,7 +267,7 @@ class OPTDecoder(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -316,7 +316,7 @@ class OPTModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -399,7 +399,7 @@ class OPTForCausalLM(nn.Module, SupportsPP, SupportsLoRA): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/orion.py b/vllm/model_executor/models/orion.py index 9d9066c4b..7d5a36a97 100644 --- a/vllm/model_executor/models/orion.py +++ b/vllm/model_executor/models/orion.py @@ -253,7 +253,7 @@ class OrionModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -343,7 +343,7 @@ class OrionForCausalLM(nn.Module, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/ouro.py b/vllm/model_executor/models/ouro.py index f51c0f095..a9476645a 100644 --- a/vllm/model_executor/models/ouro.py +++ b/vllm/model_executor/models/ouro.py @@ -357,7 +357,7 @@ class OuroModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -482,7 +482,7 @@ class OuroForCausalLM(nn.Module, SupportsLoRA): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/ovis.py b/vllm/model_executor/models/ovis.py index af3c6669d..3a058bb94 100644 --- a/vllm/model_executor/models/ovis.py +++ b/vllm/model_executor/models/ovis.py @@ -525,7 +525,7 @@ class Ovis(nn.Module, SupportsMultiModal, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/ovis2_5.py b/vllm/model_executor/models/ovis2_5.py index b2ba9b196..a787a0bf8 100644 --- a/vllm/model_executor/models/ovis2_5.py +++ b/vllm/model_executor/models/ovis2_5.py @@ -632,7 +632,7 @@ class Ovis2_5(nn.Module, SupportsMultiModal, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/paddleocr_vl.py b/vllm/model_executor/models/paddleocr_vl.py index 5028468e3..d8e66dc10 100644 --- a/vllm/model_executor/models/paddleocr_vl.py +++ b/vllm/model_executor/models/paddleocr_vl.py @@ -1159,7 +1159,7 @@ class PaddleOCRVLForConditionalGeneration(nn.Module, SupportsMultiModal, Support def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/paligemma.py b/vllm/model_executor/models/paligemma.py index 533f060ae..9d67522e2 100644 --- a/vllm/model_executor/models/paligemma.py +++ b/vllm/model_executor/models/paligemma.py @@ -389,7 +389,7 @@ class PaliGemmaForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/persimmon.py b/vllm/model_executor/models/persimmon.py index b644603c5..c7829476e 100644 --- a/vllm/model_executor/models/persimmon.py +++ b/vllm/model_executor/models/persimmon.py @@ -271,7 +271,7 @@ class PersimmonModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -348,7 +348,7 @@ class PersimmonForCausalLM(nn.Module, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/phi.py b/vllm/model_executor/models/phi.py index e01e9d47c..bf1e13614 100644 --- a/vllm/model_executor/models/phi.py +++ b/vllm/model_executor/models/phi.py @@ -234,7 +234,7 @@ class PhiModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -340,7 +340,7 @@ class PhiForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/phi3v.py b/vllm/model_executor/models/phi3v.py index fc0f46dae..3dde6dfd7 100644 --- a/vllm/model_executor/models/phi3v.py +++ b/vllm/model_executor/models/phi3v.py @@ -686,7 +686,7 @@ class Phi3VForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, SupportsQuant) def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/phi4mm.py b/vllm/model_executor/models/phi4mm.py index 06d551a4c..1d66fe2f1 100644 --- a/vllm/model_executor/models/phi4mm.py +++ b/vllm/model_executor/models/phi4mm.py @@ -1211,7 +1211,7 @@ class Phi4MMForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/phimoe.py b/vllm/model_executor/models/phimoe.py index 835f360df..19c7cecda 100644 --- a/vllm/model_executor/models/phimoe.py +++ b/vllm/model_executor/models/phimoe.py @@ -483,7 +483,7 @@ class PhiMoEModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -649,7 +649,7 @@ class PhiMoEForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/pixtral.py b/vllm/model_executor/models/pixtral.py index c8eef850c..45e5010e2 100644 --- a/vllm/model_executor/models/pixtral.py +++ b/vllm/model_executor/models/pixtral.py @@ -479,7 +479,7 @@ class PixtralForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/plamo2.py b/vllm/model_executor/models/plamo2.py index 24df17963..82833dddc 100644 --- a/vllm/model_executor/models/plamo2.py +++ b/vllm/model_executor/models/plamo2.py @@ -775,7 +775,7 @@ class Plamo2Model(torch.nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -852,7 +852,7 @@ class Plamo2ForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/plamo3.py b/vllm/model_executor/models/plamo3.py index 3550c9fa7..df1300ac1 100644 --- a/vllm/model_executor/models/plamo3.py +++ b/vllm/model_executor/models/plamo3.py @@ -342,7 +342,7 @@ class Plamo3Model(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -412,7 +412,7 @@ class Plamo3ForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/qwen.py b/vllm/model_executor/models/qwen.py index 50b53a1ff..906395260 100644 --- a/vllm/model_executor/models/qwen.py +++ b/vllm/model_executor/models/qwen.py @@ -232,7 +232,7 @@ class QWenModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -366,7 +366,7 @@ class QWenLMHeadModel(QWenBaseModel, SupportsPP, SupportsLoRA): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py index ab9eac1a9..99bddeec2 100644 --- a/vllm/model_executor/models/qwen2.py +++ b/vllm/model_executor/models/qwen2.py @@ -417,7 +417,7 @@ class Qwen2Model(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -575,7 +575,7 @@ class Qwen2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsEagle3): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/qwen2_5_omni_thinker.py b/vllm/model_executor/models/qwen2_5_omni_thinker.py index 76afd7749..e4a9c0c10 100644 --- a/vllm/model_executor/models/qwen2_5_omni_thinker.py +++ b/vllm/model_executor/models/qwen2_5_omni_thinker.py @@ -1298,7 +1298,7 @@ class Qwen2_5OmniThinkerForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index 0e99b050a..c0fb1f13b 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -1448,7 +1448,7 @@ class Qwen2_5_VLForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/qwen2_audio.py b/vllm/model_executor/models/qwen2_audio.py index a9e9de648..ab72a4482 100644 --- a/vllm/model_executor/models/qwen2_audio.py +++ b/vllm/model_executor/models/qwen2_audio.py @@ -451,7 +451,7 @@ class Qwen2AudioForConditionalGeneration(nn.Module, SupportsMultiModal, Supports def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/qwen2_moe.py b/vllm/model_executor/models/qwen2_moe.py index fbfd681d5..04330a868 100644 --- a/vllm/model_executor/models/qwen2_moe.py +++ b/vllm/model_executor/models/qwen2_moe.py @@ -395,7 +395,7 @@ class Qwen2MoeModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -579,7 +579,7 @@ class Qwen2MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/qwen2_rm.py b/vllm/model_executor/models/qwen2_rm.py index b0fa576f5..cdf1a327e 100644 --- a/vllm/model_executor/models/qwen2_rm.py +++ b/vllm/model_executor/models/qwen2_rm.py @@ -79,7 +79,7 @@ class Qwen2RewardBaseModel(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index 0af303f0a..847501c01 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -1368,7 +1368,7 @@ class Qwen2VLForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/qwen3.py b/vllm/model_executor/models/qwen3.py index 707e0ccfd..5757739c1 100644 --- a/vllm/model_executor/models/qwen3.py +++ b/vllm/model_executor/models/qwen3.py @@ -306,7 +306,7 @@ class Qwen3ForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsEagle3): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py index 567c03193..690d5368a 100644 --- a/vllm/model_executor/models/qwen3_moe.py +++ b/vllm/model_executor/models/qwen3_moe.py @@ -464,7 +464,7 @@ class Qwen3MoeModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -765,7 +765,7 @@ class Qwen3MoeForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index bc70d2b54..3e89d1972 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -1004,7 +1004,7 @@ class Qwen3NextModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -1239,7 +1239,7 @@ class Qwen3NextForCausalLM( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/qwen3_next_mtp.py b/vllm/model_executor/models/qwen3_next_mtp.py index 854d7f9a7..e76664bed 100644 --- a/vllm/model_executor/models/qwen3_next_mtp.py +++ b/vllm/model_executor/models/qwen3_next_mtp.py @@ -261,7 +261,7 @@ class Qwen3NextMTP(nn.Module, QwenNextMixtureOfExperts): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, hidden_states: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, diff --git a/vllm/model_executor/models/qwen3_omni_moe_thinker.py b/vllm/model_executor/models/qwen3_omni_moe_thinker.py index cf35d9ae3..d9a0b9923 100755 --- a/vllm/model_executor/models/qwen3_omni_moe_thinker.py +++ b/vllm/model_executor/models/qwen3_omni_moe_thinker.py @@ -998,7 +998,7 @@ class Qwen3MoeLLMModel(Qwen3MoeModel): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -1819,7 +1819,7 @@ class Qwen3OmniMoeThinkerForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index 708198d37..bbab47044 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -1119,7 +1119,7 @@ class Qwen3LLMModel(Qwen3Model): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -2001,7 +2001,7 @@ class Qwen3VLForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/qwen3_vl_moe.py b/vllm/model_executor/models/qwen3_vl_moe.py index efd965ad4..b39a3d297 100644 --- a/vllm/model_executor/models/qwen3_vl_moe.py +++ b/vllm/model_executor/models/qwen3_vl_moe.py @@ -94,7 +94,7 @@ class Qwen3MoeLLMModel(Qwen3MoeModel): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/qwen_vl.py b/vllm/model_executor/models/qwen_vl.py index 11856dd2d..ed61bb140 100644 --- a/vllm/model_executor/models/qwen_vl.py +++ b/vllm/model_executor/models/qwen_vl.py @@ -810,7 +810,7 @@ class QwenVLForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/seed_oss.py b/vllm/model_executor/models/seed_oss.py index 91a60bfd1..e38cf9277 100644 --- a/vllm/model_executor/models/seed_oss.py +++ b/vllm/model_executor/models/seed_oss.py @@ -334,7 +334,7 @@ class SeedOssModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -467,7 +467,7 @@ class SeedOssForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/skyworkr1v.py b/vllm/model_executor/models/skyworkr1v.py index f9ed02bd1..29a0389b9 100644 --- a/vllm/model_executor/models/skyworkr1v.py +++ b/vllm/model_executor/models/skyworkr1v.py @@ -898,7 +898,7 @@ class SkyworkR1VChatModel(nn.Module, SupportsMultiModal, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/solar.py b/vllm/model_executor/models/solar.py index 964aa9027..589727c6f 100644 --- a/vllm/model_executor/models/solar.py +++ b/vllm/model_executor/models/solar.py @@ -465,7 +465,7 @@ class SolarForCausalLM(nn.Module, SupportsLoRA, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/stablelm.py b/vllm/model_executor/models/stablelm.py index ea4342882..211b57ddb 100644 --- a/vllm/model_executor/models/stablelm.py +++ b/vllm/model_executor/models/stablelm.py @@ -246,7 +246,7 @@ class StableLMEpochModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -332,7 +332,7 @@ class StablelmForCausalLM(nn.Module, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/starcoder2.py b/vllm/model_executor/models/starcoder2.py index 569ca9b08..dd1e8e98f 100644 --- a/vllm/model_executor/models/starcoder2.py +++ b/vllm/model_executor/models/starcoder2.py @@ -252,7 +252,7 @@ class Starcoder2Model(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None, inputs_embeds: torch.Tensor | None = None, @@ -336,7 +336,7 @@ class Starcoder2ForCausalLM(nn.Module, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/step3_text.py b/vllm/model_executor/models/step3_text.py index 7077f1a22..4855dffec 100644 --- a/vllm/model_executor/models/step3_text.py +++ b/vllm/model_executor/models/step3_text.py @@ -354,7 +354,7 @@ class Step3TextModel(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, @@ -419,7 +419,7 @@ class Step3TextForCausalLM(nn.Module, SupportsPP): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/step3_vl.py b/vllm/model_executor/models/step3_vl.py index 1ab1c4ad6..8f41e8c5b 100644 --- a/vllm/model_executor/models/step3_vl.py +++ b/vllm/model_executor/models/step3_vl.py @@ -1101,7 +1101,7 @@ class Step3VLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP) def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/tarsier.py b/vllm/model_executor/models/tarsier.py index cc7ccc65b..5945b7c72 100644 --- a/vllm/model_executor/models/tarsier.py +++ b/vllm/model_executor/models/tarsier.py @@ -585,7 +585,7 @@ class TarsierForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP) def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/ultravox.py b/vllm/model_executor/models/ultravox.py index 899611f13..e8962c8bb 100644 --- a/vllm/model_executor/models/ultravox.py +++ b/vllm/model_executor/models/ultravox.py @@ -714,7 +714,7 @@ class UltravoxModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/voxtral.py b/vllm/model_executor/models/voxtral.py index c3d68967a..aa0e616a9 100644 --- a/vllm/model_executor/models/voxtral.py +++ b/vllm/model_executor/models/voxtral.py @@ -397,7 +397,7 @@ class VoxtralForConditionalGeneration( def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/voxtral_streaming.py b/vllm/model_executor/models/voxtral_streaming.py index fb20d986a..3d1bb1933 100644 --- a/vllm/model_executor/models/voxtral_streaming.py +++ b/vllm/model_executor/models/voxtral_streaming.py @@ -173,7 +173,7 @@ class VoxtralStreamingGeneration(VoxtralForConditionalGeneration): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, intermediate_tensors: IntermediateTensors | None = None, inputs_embeds: torch.Tensor | None = None, diff --git a/vllm/model_executor/models/zamba2.py b/vllm/model_executor/models/zamba2.py index 59a8520f7..dafad457a 100644 --- a/vllm/model_executor/models/zamba2.py +++ b/vllm/model_executor/models/zamba2.py @@ -771,7 +771,7 @@ class Zamba2Model(nn.Module): def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, inputs_embeds: torch.Tensor | None = None, ) -> torch.Tensor | IntermediateTensors: @@ -947,7 +947,7 @@ class Zamba2ForCausalLM(nn.Module, HasInnerState, IsHybrid, SupportsMambaPrefixC def forward( self, - input_ids: torch.Tensor, + input_ids: torch.Tensor | None, positions: torch.Tensor, inputs_embeds: torch.Tensor | None = None, **kwargs: Any,