diff --git a/vllm/distributed/kv_transfer/kv_connector/utils.py b/vllm/distributed/kv_transfer/kv_connector/utils.py index 019201ede..f9367da73 100644 --- a/vllm/distributed/kv_transfer/kv_connector/utils.py +++ b/vllm/distributed/kv_transfer/kv_connector/utils.py @@ -324,6 +324,7 @@ class TpKVTopology: kv_cache_shape = self.attn_backend.get_kv_cache_shape( num_blocks=1, block_size=_MOCK_BLOCK_SIZE, num_kv_heads=1, head_size=1 ) + logger.debug("Test kv_cache_shape: %s", kv_cache_shape) # Non-MLA backends caches have 5 dims [2, num_blocks, H,N,D], # we just mock num_blocks to 1 for the dimension check below. self._is_kv_layout_blocks_first = ( @@ -337,6 +338,7 @@ class TpKVTopology: ) if self._cross_layers_blocks: + logger.debug("Using cross-layer KV cache") # prepend layers dimension _MOCK_NUM_LAYERS = 80 kv_cache_shape = (_MOCK_NUM_LAYERS,) + kv_cache_shape diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index 8ce939ee4..3a8400447 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -1354,6 +1354,9 @@ class NixlConnectorWorker: if base_addr in seen_base_addresses: continue + logger.debug( + "Registering layer %s with cache shape: %s", layer_name, cache.shape + ) kernel_block_size = cache.shape[self.kv_topo.block_size_position] if self.block_size != kernel_block_size: logger.info_once(