[Core][Hybrid allocator + kv connector 1/n] Enable hybrid allocator + KV cache connector (#25712)

Signed-off-by: KuntaiDu <kuntai@uchicago.edu>
Signed-off-by: Kuntai Du <kuntai@uchicago.edu>
This commit is contained in:
Kuntai Du
2025-10-24 23:34:18 -07:00
committed by GitHub
parent 56ed7609a9
commit b853540388
15 changed files with 113 additions and 18 deletions

View File

@@ -46,6 +46,7 @@ def create_scheduler(
num_speculative_tokens: int | None = None,
skip_tokenizer_init: bool = False,
async_scheduling: bool = False,
disable_hybrid_kv_cache_manager: bool = False,
) -> Scheduler | AsyncScheduler:
"""Create scheduler under test.
@@ -70,6 +71,7 @@ def create_scheduler(
disable_chunked_mm_input=disable_chunked_mm_input,
enable_chunked_prefill=True,
async_scheduling=async_scheduling,
disable_hybrid_kv_cache_manager=disable_hybrid_kv_cache_manager,
)
model_config = ModelConfig(
model=model,