[KV offload] Enable CPU KV offload on CUDA alike Platforms (#27770)

Signed-off-by: zhewenli <zhewenli@meta.com>
This commit is contained in:
Zhewen Li
2025-10-30 07:10:29 -07:00
committed by GitHub
parent 4e68cc9b6a
commit 0fe0140408
2 changed files with 2 additions and 6 deletions

View File

@@ -12,7 +12,6 @@ from tqdm import tqdm
from vllm import LLM, SamplingParams, TokensPrompt
from vllm.config import KVEventsConfig, KVTransferConfig
from vllm.distributed.kv_events import BlockStored, KVEventBatch
from vllm.platforms import current_platform
CPU_BLOCK_SIZES = [16, 48]
@@ -64,9 +63,6 @@ class MockSubscriber:
self.sub.close()
@pytest.mark.skipif(
not current_platform.is_cuda(), reason="CPU offloading only supported on CUDA"
)
@pytest.mark.parametrize("cpu_block_size", CPU_BLOCK_SIZES)
def test_cpu_offloading(cpu_block_size: int) -> None:
"""