diff --git a/tests/models/multimodal/processing/test_qwen2_5_omni_embed.py b/tests/models/multimodal/processing/test_qwen2_5_omni_embed.py index 5001b98b6..4eb4d03bf 100644 --- a/tests/models/multimodal/processing/test_qwen2_5_omni_embed.py +++ b/tests/models/multimodal/processing/test_qwen2_5_omni_embed.py @@ -185,14 +185,16 @@ def make_mock_model(hidden: int = 8): # super().embed_input_ids → use SupportsMultiModal.embed_input_ids def fake_super_embed( - ids, mm_embs=None, *, is_multimodal=None, handle_oov_mm_token=False + ids, + mm_embs=None, + *, + is_multimodal=None, ): return SupportsMultiModal.embed_input_ids( model, ids, mm_embs, is_multimodal=is_multimodal, - handle_oov_mm_token=handle_oov_mm_token, ) # Bind embed_input_ids as the real method diff --git a/vllm/model_executor/models/fireredasr2.py b/vllm/model_executor/models/fireredasr2.py index 5d6c68454..0aae13997 100644 --- a/vllm/model_executor/models/fireredasr2.py +++ b/vllm/model_executor/models/fireredasr2.py @@ -793,7 +793,6 @@ class FireRedASR2ForConditionalGeneration( multimodal_embeddings: MultiModalEmbeddings | None = None, *, is_multimodal: torch.Tensor | None = None, - handle_oov_mm_token: bool = False, ) -> torch.Tensor: inputs_embeds = self.model.decoder.embed_input_ids(input_ids) diff --git a/vllm/model_executor/models/kimi_audio.py b/vllm/model_executor/models/kimi_audio.py index 36d22d867..651144683 100644 --- a/vllm/model_executor/models/kimi_audio.py +++ b/vllm/model_executor/models/kimi_audio.py @@ -514,7 +514,6 @@ class KimiAudioForConditionalGeneration( multimodal_embeddings: tuple[torch.Tensor, ...] | None = None, *, is_multimodal: torch.Tensor | None = None, - handle_oov_mm_token: bool = False, ) -> torch.Tensor: """Embed input IDs and fuse with audio embeddings.