From ec51831a22cbb434646a5d8219c694ab15dbc4cb Mon Sep 17 00:00:00 2001
From: Harry Huang <huanghaoyan.hhy@alibaba-inc.com>
Date: Fri, 30 Jan 2026 12:40:19 +0800
Subject: [PATCH] [BugFix] Disable async scheduling for Mamba prefix caching
 (#33352)

Signed-off-by: huanghaoyan.hhy <huanghaoyan.hhy@alibaba-inc.com>
---
 vllm/config/vllm.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py
index 719b414b1..d9e91a2c2 100644
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -619,6 +619,11 @@ class VllmConfig:
                     "`external_launcher` distributed executor backend, but you chose "
                     f"`{executor_backend}`."
                 )
+            if self.cache_config.mamba_cache_mode != "none":
+                raise ValueError(
+                    "Currently, async scheduling is not compatible with "
+                    "prefix caching for Mamba models."
+                )
         elif self.scheduler_config.async_scheduling is None:
             # Enable async scheduling unless there is an incompatible option.
             if (
@@ -651,6 +656,13 @@ class VllmConfig:
                     scope="local",
                 )
                 self.scheduler_config.async_scheduling = False
+            elif self.cache_config.mamba_cache_mode != "none":
+                logger.warning_once(
+                    "Async scheduling is not compatible with "
+                    "prefix caching for Mamba models and will be disabled.",
+                    scope="local",
+                )
+                self.scheduler_config.async_scheduling = False
             else:
                 self.scheduler_config.async_scheduling = True