[Core] Simplify core kv-cache blocks initialization logic (#36521)

Signed-off-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
Nick Hill
2026-03-10 13:20:02 -07:00
committed by GitHub
parent 2a68464c5b
commit 65b2f405dc
4 changed files with 28 additions and 37 deletions

View File

@@ -88,9 +88,15 @@ def can_initialize(
[10 * GiB_bytes],
)
scheduler_kv_cache_config = generate_scheduler_kv_cache_config(kv_cache_configs)
vllm_config.cache_config.num_gpu_blocks = scheduler_kv_cache_config.num_blocks
kv_cache_groups = scheduler_kv_cache_config.kv_cache_groups
if kv_cache_groups:
vllm_config.cache_config.block_size = min(
g.kv_cache_spec.block_size for g in kv_cache_groups
)
# gpu_blocks (> 0), cpu_blocks, scheduler_kv_cache_config
return 1, 0, scheduler_kv_cache_config
vllm_config.validate_block_size()
return scheduler_kv_cache_config
if model_arch == "MiniMaxVL01ForConditionalGeneration":
pytest.skip(