diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 61e166133..061ac8680 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -4070,7 +4070,7 @@ class GPUModelRunner( else: target_hidden_states = hidden_states[:total_num_tokens] - if self.supports_mm_inputs: + if self.supports_mm_inputs and self.drafter.supports_mm_inputs: mm_embed_inputs = self._gather_mm_embeddings( scheduler_output, shift_computed_tokens=1,