[Hybrid] Enable mamba prefix cache "align" mode with async scheduling (#33997)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
Thomas Parnell
2026-02-14 22:15:56 +01:00
committed by GitHub
parent 73391a1baa
commit d5fe3f702c
4 changed files with 77 additions and 32 deletions

View File

@@ -648,11 +648,6 @@ class VllmConfig:
"`external_launcher` distributed executor backend, but you chose "
f"`{executor_backend}`."
)
if self.cache_config.mamba_cache_mode != "none":
raise ValueError(
"Currently, async scheduling is not compatible with "
"prefix caching for Mamba models."
)
elif self.scheduler_config.async_scheduling is None:
# Enable async scheduling unless there is an incompatible option.
if (
@@ -685,13 +680,6 @@ class VllmConfig:
scope="local",
)
self.scheduler_config.async_scheduling = False
elif self.cache_config.mamba_cache_mode != "none":
logger.warning_once(
"Async scheduling is not compatible with "
"prefix caching for Mamba models and will be disabled.",
scope="local",
)
self.scheduler_config.async_scheduling = False
else:
self.scheduler_config.async_scheduling = True