[Bugfix] If chunked_prefill is disabled, end the scheduling early. (#28911)

Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
This commit is contained in:
wang.yuqi
2025-11-25 16:06:09 +08:00
committed by GitHub
parent 6330f9477d
commit 67fc16cd8c
3 changed files with 33 additions and 4 deletions

View File

@@ -42,6 +42,7 @@ def create_scheduler(
model: str = "facebook/opt-125m",
max_num_seqs: int = 16,
max_num_batched_tokens: int = 8192,
enable_chunked_prefill: bool = True,
enable_prefix_caching: bool = False,
long_prefill_token_threshold: int = 0,
disable_chunked_mm_input: bool = False,
@@ -76,7 +77,7 @@ def create_scheduler(
max_model_len=max_model_len,
long_prefill_token_threshold=long_prefill_token_threshold,
disable_chunked_mm_input=disable_chunked_mm_input,
enable_chunked_prefill=True,
enable_chunked_prefill=enable_chunked_prefill,
async_scheduling=async_scheduling,
)
model_config = ModelConfig(