[Model Runner V2] Fix warmup for very small kvcache and/or blocksizes (#36176)

Signed-off-by: Nick Hill <nickhill123@gmail.com>
2026-03-05 14:48:10 -08:00
parent a97954b6a8
commit a73af584fe
2 changed files with 34 additions and 4 deletions
--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -464,6 +464,10 @@ class Worker(WorkerBase):
    def initialize_from_config(self, kv_cache_config: KVCacheConfig) -> None:
        """Allocate GPU KV cache with the specified kv_cache_config."""

+        # Update local config with adjusted num blocks after profiling,
+        # so that it's available to the warmup stage.
+        self.cache_config.num_gpu_blocks = kv_cache_config.num_blocks
+
        # Init kv cache connector here, because it requires
        # `kv_cache_config`.
        # NOTE(Kuntai): This need to be done before `initialize_kv_cache`,