Improve enable chunked_prefill & prefix_caching logic. (#26623)
Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io> Signed-off-by: wang.yuqi <noooop@126.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -119,11 +119,12 @@ class EngineCore:
|
||||
# Setup scheduler.
|
||||
Scheduler = vllm_config.scheduler_config.get_scheduler_cls()
|
||||
|
||||
if len(kv_cache_config.kv_cache_groups) == 0:
|
||||
if len(kv_cache_config.kv_cache_groups) == 0: # noqa: SIM102
|
||||
# Encoder models without KV cache don't support
|
||||
# chunked prefill. But do SSM models?
|
||||
logger.info("Disabling chunked prefill for model without KVCache")
|
||||
vllm_config.scheduler_config.enable_chunked_prefill = False
|
||||
if vllm_config.scheduler_config.enable_chunked_prefill:
|
||||
logger.warning("Disabling chunked prefill for model without KVCache")
|
||||
vllm_config.scheduler_config.enable_chunked_prefill = False
|
||||
|
||||
scheduler_block_size = (
|
||||
vllm_config.cache_config.block_size
|
||||
|
||||
Reference in New Issue
Block a user