[Bugfix] Fix validate model input for decoder models (#27099)
Signed-off-by: Yannick Schnider <yannick.schnider1@ibm.com> Signed-off-by: Yannick Schnider <Yannick.Schnider1@ibm.com> Signed-off-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
@@ -575,6 +575,21 @@ class Processor:
|
||||
# check that chunked prefill does not truncate them
|
||||
# max_batch_len = self.scheduler_config.max_num_batched_tokens
|
||||
|
||||
if (
|
||||
prompt_len == max_prompt_len
|
||||
and prompt_type == "decoder"
|
||||
and not model_config.is_multimodal_model
|
||||
):
|
||||
suggestion = (
|
||||
"Make sure that `max_model_len` is no smaller than the "
|
||||
"number of text tokens (prompt + requested output tokens)."
|
||||
)
|
||||
raise ValueError(
|
||||
f"The {prompt_type} prompt (length {prompt_len}) plus the number of "
|
||||
f"requested output tokens (at least 1) is longer than the maximum "
|
||||
f"model length of {max_prompt_len}. {suggestion}"
|
||||
)
|
||||
|
||||
def stat_mm_cache(self) -> MultiModalCacheStats | None:
|
||||
return self.input_preprocessor.stat_mm_cache()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user