[Model Runner V2] Fix warmup for very small kvcache and/or blocksizes (#36176)
Signed-off-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
@@ -464,6 +464,10 @@ class Worker(WorkerBase):
|
||||
def initialize_from_config(self, kv_cache_config: KVCacheConfig) -> None:
|
||||
"""Allocate GPU KV cache with the specified kv_cache_config."""
|
||||
|
||||
# Update local config with adjusted num blocks after profiling,
|
||||
# so that it's available to the warmup stage.
|
||||
self.cache_config.num_gpu_blocks = kv_cache_config.num_blocks
|
||||
|
||||
# Init kv cache connector here, because it requires
|
||||
# `kv_cache_config`.
|
||||
# NOTE(Kuntai): This need to be done before `initialize_kv_cache`,
|
||||
|
||||
Reference in New Issue
Block a user