[Hybrid Allocator] Support KV cache groups with different block_size (#29143)

Signed-off-by: Yifan Qiao <yifanqiao@berkeley.edu>
Co-authored-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
Yifan Qiao
2025-11-25 07:30:57 -08:00
committed by GitHub
parent e502098643
commit 48ddb02b79
11 changed files with 472 additions and 87 deletions

View File

@@ -186,6 +186,7 @@ class Scheduler(SchedulerInterface):
enable_kv_cache_events=self.enable_kv_cache_events,
dcp_world_size=self.dcp_world_size,
pcp_world_size=self.pcp_world_size,
hash_block_size=self.block_size,
)
self.use_pp = self.parallel_config.pipeline_parallel_size > 1
self.use_v2_model_runner = envs.VLLM_USE_V2_MODEL_RUNNER