Support encoder-only models without KV-Cache (#21270)

Signed-off-by: Max de Bayser <maxdebayser@gmail.com>
Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
Co-authored-by: Russell Bryant <rbryant@redhat.com>
This commit is contained in:
Maximilien de Bayser
2025-07-26 10:09:52 -03:00
committed by GitHub
parent f27fdfc3ed
commit 1cd6eaba54
17 changed files with 352 additions and 99 deletions

View File

@@ -111,6 +111,12 @@ class EngineCore:
"compatibility may not be maintained.",
vllm_config.scheduler_config.scheduler_cls)
if len(kv_cache_config.kv_cache_groups) == 0:
# Encoder models without KV cache don't support
# chunked prefill. But do SSM models?
logger.info("Disabling chunked prefill for model without KVCache")
vllm_config.scheduler_config.chunked_prefill_enabled = False
self.scheduler: SchedulerInterface = Scheduler(
vllm_config=vllm_config,
kv_cache_config=kv_cache_config,