[V1] Add disable_chunked_mm_input arg to disable partial mm input prefill (#15837)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-04-08 00:24:07 -06:00
committed by GitHub
parent 87918e40c4
commit 8e5314a468
4 changed files with 80 additions and 0 deletions

View File

@@ -1721,6 +1721,14 @@ class SchedulerConfig:
chunked_prefill_enabled: bool = field(init=False)
# If set to true and chunked prefill is enabled, we do not want to
# partially schedule a multimodal item. Only used in V1
# This ensures that if a request has a mixed prompt
# (like text tokens TTTT followed by image tokens IIIIIIIIII) where only
# some image tokens can be scheduled (like TTTTIIIII, leaving IIIII),
# it will be scheduled as TTTT in one step and IIIIIIIIII in the next.
disable_chunked_mm_input: bool = False
# scheduler class or path. "vllm.core.scheduler.Scheduler" (default)
# or "mod.custom_class".
scheduler_cls: Union[str, type[object]] = "vllm.core.scheduler.Scheduler"