From ec51831a22cbb434646a5d8219c694ab15dbc4cb Mon Sep 17 00:00:00 2001 From: Harry Huang Date: Fri, 30 Jan 2026 12:40:19 +0800 Subject: [PATCH] [BugFix] Disable async scheduling for Mamba prefix caching (#33352) Signed-off-by: huanghaoyan.hhy --- vllm/config/vllm.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 719b414b1..d9e91a2c2 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -619,6 +619,11 @@ class VllmConfig: "`external_launcher` distributed executor backend, but you chose " f"`{executor_backend}`." ) + if self.cache_config.mamba_cache_mode != "none": + raise ValueError( + "Currently, async scheduling is not compatible with " + "prefix caching for Mamba models." + ) elif self.scheduler_config.async_scheduling is None: # Enable async scheduling unless there is an incompatible option. if ( @@ -651,6 +656,13 @@ class VllmConfig: scope="local", ) self.scheduler_config.async_scheduling = False + elif self.cache_config.mamba_cache_mode != "none": + logger.warning_once( + "Async scheduling is not compatible with " + "prefix caching for Mamba models and will be disabled.", + scope="local", + ) + self.scheduler_config.async_scheduling = False else: self.scheduler_config.async_scheduling = True