[bugfix][DCP] fix block_size of hash in DCP prefix caching (#26296)
Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
@@ -142,12 +142,18 @@ class EngineCore:
|
||||
logger.info("Disabling chunked prefill for model without KVCache")
|
||||
vllm_config.scheduler_config.chunked_prefill_enabled = False
|
||||
|
||||
scheduler_block_size = (
|
||||
vllm_config.cache_config.block_size
|
||||
* vllm_config.parallel_config.decode_context_parallel_size
|
||||
)
|
||||
|
||||
self.scheduler: SchedulerInterface = Scheduler(
|
||||
vllm_config=vllm_config,
|
||||
kv_cache_config=kv_cache_config,
|
||||
structured_output_manager=self.structured_output_manager,
|
||||
include_finished_set=vllm_config.parallel_config.data_parallel_size > 1,
|
||||
log_stats=self.log_stats,
|
||||
block_size=scheduler_block_size,
|
||||
)
|
||||
self.use_spec_decode = vllm_config.speculative_config is not None
|
||||
if self.scheduler.connector is not None: # type: ignore
|
||||
@@ -177,14 +183,13 @@ class EngineCore:
|
||||
self.vllm_config.cache_config.enable_prefix_caching
|
||||
or self.scheduler.get_kv_connector() is not None
|
||||
):
|
||||
block_size = vllm_config.cache_config.block_size
|
||||
caching_hash_fn = get_hash_fn_by_name(
|
||||
vllm_config.cache_config.prefix_caching_hash_algo
|
||||
)
|
||||
init_none_hash(caching_hash_fn)
|
||||
|
||||
self.request_block_hasher = get_request_block_hasher(
|
||||
block_size, caching_hash_fn
|
||||
scheduler_block_size, caching_hash_fn
|
||||
)
|
||||
|
||||
self.step_fn = (
|
||||
|
||||
Reference in New Issue
Block a user