[Hybrid] Enable spec decoding in mamba cache align mode (#33705)
Signed-off-by: huanghaoyan.hhy <huanghaoyan.hhy@alibaba-inc.com>
This commit is contained in:
@@ -354,10 +354,6 @@ class MambaModelConfig(VerifyAndUpdateConfig):
|
||||
assert vllm_config.scheduler_config.enable_chunked_prefill, (
|
||||
"Chunked prefill is required for mamba cache mode 'align'."
|
||||
)
|
||||
assert not vllm_config.speculative_config, (
|
||||
"Mamba cache mode 'align' is currently not compatible "
|
||||
"with speculative decoding."
|
||||
)
|
||||
logger.info(
|
||||
"Warning: Prefix caching in Mamba cache '%s' "
|
||||
"mode is currently enabled. "
|
||||
|
||||
Reference in New Issue
Block a user