[Hybrid] Enable mamba prefix cache "align" mode with async scheduling (#33997)
Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
@@ -648,11 +648,6 @@ class VllmConfig:
|
||||
"`external_launcher` distributed executor backend, but you chose "
|
||||
f"`{executor_backend}`."
|
||||
)
|
||||
if self.cache_config.mamba_cache_mode != "none":
|
||||
raise ValueError(
|
||||
"Currently, async scheduling is not compatible with "
|
||||
"prefix caching for Mamba models."
|
||||
)
|
||||
elif self.scheduler_config.async_scheduling is None:
|
||||
# Enable async scheduling unless there is an incompatible option.
|
||||
if (
|
||||
@@ -685,13 +680,6 @@ class VllmConfig:
|
||||
scope="local",
|
||||
)
|
||||
self.scheduler_config.async_scheduling = False
|
||||
elif self.cache_config.mamba_cache_mode != "none":
|
||||
logger.warning_once(
|
||||
"Async scheduling is not compatible with "
|
||||
"prefix caching for Mamba models and will be disabled.",
|
||||
scope="local",
|
||||
)
|
||||
self.scheduler_config.async_scheduling = False
|
||||
else:
|
||||
self.scheduler_config.async_scheduling = True
|
||||
|
||||
|
||||
Reference in New Issue
Block a user