# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Tests for KV cache offloading configuration.""" import pytest from vllm.config import CacheConfig, KVTransferConfig, ParallelConfig, VllmConfig pytestmark = pytest.mark.cpu_test @pytest.mark.parametrize( "kv_offloading_backend,kv_offloading_size,tp,pp,expected_backend,expected_bytes", [ ("native", 4.0, 1, 1, "OffloadingConnector", 4.0 * (1 << 30)), # bytes per rank: 8.0 GiB / (2 * 2) = 2.0 GiB ("native", 8.0, 2, 2, "OffloadingConnector", 8.0 * (1 << 30)), ("lmcache", 4.0, 1, 1, "LMCacheConnectorV1", 4.0), # size per rank: 8.0 GiB / (2 * 2) = 2.0 GiB ("lmcache", 8.0, 2, 2, "LMCacheConnectorV1", 2.0), # When kv_offloading_size is None, offloading is disabled (backend is ignored) ("native", None, 1, 1, None, None), ], ) def test_kv_connector( kv_offloading_backend, kv_offloading_size, tp, pp, expected_backend, expected_bytes ): kv_transfer_config = ( KVTransferConfig(kv_connector_extra_config={"existing_key": "existing_value"}) if expected_backend is not None else None ) vllm_config = VllmConfig( cache_config=CacheConfig( kv_offloading_backend=kv_offloading_backend, kv_offloading_size=kv_offloading_size, ), kv_transfer_config=kv_transfer_config, parallel_config=ParallelConfig( tensor_parallel_size=tp, pipeline_parallel_size=pp ), ) # No KV transfer config expected if expected_backend is None: assert vllm_config.kv_transfer_config is expected_backend return kv_transfer_config = vllm_config.kv_transfer_config kv_connector_extra_config = kv_transfer_config.kv_connector_extra_config assert kv_transfer_config.kv_connector == expected_backend assert kv_transfer_config.kv_role == "kv_both" if kv_offloading_backend == "native": assert kv_connector_extra_config["cpu_bytes_to_use"] == expected_bytes # Existing config should be preserved assert kv_connector_extra_config["existing_key"] == "existing_value" elif kv_offloading_backend == "lmcache": assert kv_connector_extra_config["lmcache.local_cpu"] is True assert kv_connector_extra_config["lmcache.max_local_cpu_size"] == expected_bytes # Existing config should be replaced assert "existing_key" not in kv_connector_extra_config def test_kv_offloading_size_only_uses_native_default(): """Test that setting only kv_offloading_size enables native offloading.""" vllm_config = VllmConfig( cache_config=CacheConfig( kv_offloading_size=4.0, # kv_offloading_backend not set, should default to "native" ), ) kv_transfer_config = vllm_config.kv_transfer_config kv_connector_extra_config = kv_transfer_config.kv_connector_extra_config assert kv_transfer_config.kv_connector == "OffloadingConnector" assert kv_transfer_config.kv_role == "kv_both" assert kv_connector_extra_config["cpu_bytes_to_use"] == 4.0 * (1 << 30)