[MODEL] Adding Support for Qwen3.5 Models (#34110)
Signed-off-by: JJJYmmm <1650675829@qq.com> Signed-off-by: JJJYmmm <92386084+JJJYmmm@users.noreply.github.com> Signed-off-by: Roger Wang <hey@rogerw.io> Co-authored-by: wulipc <wulipc@users.noreply.github.com> Co-authored-by: ywang96 <ywang96@users.noreply.github.com> Co-authored-by: Isotr0py <Isotr0py@users.noreply.github.com> Co-authored-by: Isotr0py <2037008807@qq.com> Co-authored-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
@@ -43,7 +43,8 @@ class MambaBase(AttentionLayerBase):
|
||||
def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec | None:
|
||||
if (
|
||||
vllm_config.speculative_config is not None
|
||||
and vllm_config.model_config.hf_config.model_type not in ["qwen3_next"]
|
||||
and vllm_config.model_config.hf_config.model_type
|
||||
not in ["qwen3_next", "qwen3_5", "qwen3_5_moe"]
|
||||
):
|
||||
raise NotImplementedError(
|
||||
"Mamba with speculative decoding is not supported yet."
|
||||
|
||||
Reference in New Issue
Block a user