[Core][Hybrid allocator + kv connector 1/n] Enable hybrid allocator + KV cache connector (#25712)
Signed-off-by: KuntaiDu <kuntai@uchicago.edu> Signed-off-by: Kuntai Du <kuntai@uchicago.edu>
This commit is contained in:
@@ -27,6 +27,7 @@ def test_cpu_offloading(cpu_block_size: int) -> None:
|
||||
model="meta-llama/Llama-3.2-1B-Instruct",
|
||||
gpu_memory_utilization=0.5,
|
||||
kv_transfer_config=kv_transfer_config,
|
||||
disable_hybrid_kv_cache_manager=True,
|
||||
)
|
||||
|
||||
prompts = ["Hi " * 100]
|
||||
|
||||
Reference in New Issue
Block a user