[Bugfix] Fix validate model input for decoder models (#27099)

Signed-off-by: Yannick Schnider <yannick.schnider1@ibm.com>
Signed-off-by: Yannick Schnider <Yannick.Schnider1@ibm.com>
Signed-off-by: Michael Goin <mgoin64@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com>
Co-authored-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
Yannick Schnider
2025-11-13 19:18:47 +01:00
committed by GitHub
parent fe1cd7704d
commit 119c4927b3
2 changed files with 78 additions and 0 deletions

View File

@@ -575,6 +575,21 @@ class Processor:
# check that chunked prefill does not truncate them
# max_batch_len = self.scheduler_config.max_num_batched_tokens
if (
prompt_len == max_prompt_len
and prompt_type == "decoder"
and not model_config.is_multimodal_model
):
suggestion = (
"Make sure that `max_model_len` is no smaller than the "
"number of text tokens (prompt + requested output tokens)."
)
raise ValueError(
f"The {prompt_type} prompt (length {prompt_len}) plus the number of "
f"requested output tokens (at least 1) is longer than the maximum "
f"model length of {max_prompt_len}. {suggestion}"
)
def stat_mm_cache(self) -> MultiModalCacheStats | None:
return self.input_preprocessor.stat_mm_cache()