[Core][Hybrid allocator + kv connector 1/n] Enable hybrid allocator + KV cache connector (#25712)

Signed-off-by: KuntaiDu <kuntai@uchicago.edu>
Signed-off-by: Kuntai Du <kuntai@uchicago.edu>
This commit is contained in:
Kuntai Du
2025-10-24 23:34:18 -07:00
committed by GitHub
parent 56ed7609a9
commit b853540388
15 changed files with 113 additions and 18 deletions

View File

@@ -27,6 +27,7 @@ def test_cpu_offloading(cpu_block_size: int) -> None:
model="meta-llama/Llama-3.2-1B-Instruct",
gpu_memory_utilization=0.5,
kv_transfer_config=kv_transfer_config,
disable_hybrid_kv_cache_manager=True,
)
prompts = ["Hi " * 100]