Enforce valid max_num_batched_tokens when disable_chunked_mm_input=True (#16447)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-04-11 02:09:52 -06:00
committed by GitHub
parent f7030df3be
commit aa3b3d76e0
3 changed files with 18 additions and 1 deletions

View File

@@ -1030,7 +1030,7 @@ class EngineArgs:
action=StoreBoolean,
default=EngineArgs.disable_chunked_mm_input,
nargs="?",
const="False",
const="True",
help="Disable multimodal input chunking attention for V1. "
"If set to true and chunked prefill is enabled, we do not want to"
" partially schedule a multimodal item. This ensures that if a "