[Bugfix] Fix validate model input for decoder models (#27099)

Signed-off-by: Yannick Schnider <yannick.schnider1@ibm.com> Signed-off-by: Yannick Schnider <Yannick.Schnider1@ibm.com> Signed-off-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: Roger Wang <hey@rogerw.io>
2025-11-13 19:18:47 +01:00
parent fe1cd7704d
commit 119c4927b3
2 changed files with 78 additions and 0 deletions
--- a/vllm/v1/engine/processor.py
+++ b/vllm/v1/engine/processor.py
@@ -575,6 +575,21 @@ class Processor:
            # check that chunked prefill does not truncate them
            # max_batch_len = self.scheduler_config.max_num_batched_tokens

+        if (
+            prompt_len == max_prompt_len
+            and prompt_type == "decoder"
+            and not model_config.is_multimodal_model
+        ):
+            suggestion = (
+                "Make sure that `max_model_len` is no smaller than the "
+                "number of text tokens (prompt + requested output tokens)."
+            )
+            raise ValueError(
+                f"The {prompt_type} prompt (length {prompt_len}) plus the number of "
+                f"requested output tokens (at least 1) is longer than the maximum "
+                f"model length of {max_prompt_len}. {suggestion}"
+            )
+
    def stat_mm_cache(self) -> MultiModalCacheStats | None:
        return self.input_preprocessor.stat_mm_cache()