[BugFix] Disable async scheduling for Mamba prefix caching (#33352)

Signed-off-by: huanghaoyan.hhy <huanghaoyan.hhy@alibaba-inc.com>
This commit is contained in:
Harry Huang
2026-01-30 12:40:19 +08:00
committed by GitHub
parent 80b918f2bd
commit ec51831a22

View File

@@ -619,6 +619,11 @@ class VllmConfig:
"`external_launcher` distributed executor backend, but you chose "
f"`{executor_backend}`."
)
if self.cache_config.mamba_cache_mode != "none":
raise ValueError(
"Currently, async scheduling is not compatible with "
"prefix caching for Mamba models."
)
elif self.scheduler_config.async_scheduling is None:
# Enable async scheduling unless there is an incompatible option.
if (
@@ -651,6 +656,13 @@ class VllmConfig:
scope="local",
)
self.scheduler_config.async_scheduling = False
elif self.cache_config.mamba_cache_mode != "none":
logger.warning_once(
"Async scheduling is not compatible with "
"prefix caching for Mamba models and will be disabled.",
scope="local",
)
self.scheduler_config.async_scheduling = False
else:
self.scheduler_config.async_scheduling = True