diff --git a/vllm/model_executor/models/transformers/multimodal.py b/vllm/model_executor/models/transformers/multimodal.py index 4912ae677..9ad271427 100644 --- a/vllm/model_executor/models/transformers/multimodal.py +++ b/vllm/model_executor/models/transformers/multimodal.py @@ -448,20 +448,21 @@ class MultiModalMixin(SupportsMultiModal, SupportsMRoPE): # In v4 `get_rope_index` doesn't have wildcard `kwargs`, and # can't accept arbitrary args, even if its value is `None` kwargs = {} - if mm_token_type_ids: - if not hasattr(self, "_get_rope_index_accepts_mm_token_type_ids"): - import inspect + if not hasattr(self, "_get_rope_index_accepts_mm_token_type_ids"): + import inspect - sig = inspect.signature(self.model.get_rope_index) - params = sig.parameters - self._get_rope_index_accepts_mm_token_type_ids = ( - "mm_token_type_ids" in params - or any( - p.kind == inspect.Parameter.VAR_KEYWORD for p in params.values() - ) - ) - if self._get_rope_index_accepts_mm_token_type_ids: + sig = inspect.signature(self.model.get_rope_index) + params = sig.parameters + self._get_rope_index_accepts_mm_token_type_ids = ( + "mm_token_type_ids" in params + or any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params.values()) + ) + if self._get_rope_index_accepts_mm_token_type_ids: + if mm_token_type_ids: kwargs["mm_token_type_ids"] = torch.cat(mm_token_type_ids) + else: + shape = (1, len(input_tokens)) + kwargs["mm_token_type_ids"] = torch.zeros(*shape, dtype=torch.int) mrope_positions, mrope_position_delta = self.model.get_rope_index( input_ids=torch.tensor(input_tokens).unsqueeze(0),