[MODEL] Adding Support for Qwen3.5 Models (#34110)

Signed-off-by: JJJYmmm <1650675829@qq.com> Signed-off-by: JJJYmmm <92386084+JJJYmmm@users.noreply.github.com> Signed-off-by: Roger Wang <hey@rogerw.io> Co-authored-by: wulipc <wulipc@users.noreply.github.com> Co-authored-by: ywang96 <ywang96@users.noreply.github.com> Co-authored-by: Isotr0py <Isotr0py@users.noreply.github.com> Co-authored-by: Isotr0py <2037008807@qq.com> Co-authored-by: Roger Wang <hey@rogerw.io>
2026-02-09 21:12:58 +08:00
parent 9bdb06b436
commit 9562912cea
11 changed files with 1501 additions and 9 deletions
--- a/vllm/model_executor/layers/mamba/abstract.py
+++ b/vllm/model_executor/layers/mamba/abstract.py
@@ -43,7 +43,8 @@ class MambaBase(AttentionLayerBase):
    def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec | None:
        if (
            vllm_config.speculative_config is not None
-            and vllm_config.model_config.hf_config.model_type not in ["qwen3_next"]
+            and vllm_config.model_config.hf_config.model_type
+            not in ["qwen3_next", "qwen3_5", "qwen3_5_moe"]
        ):
            raise NotImplementedError(
                "Mamba with speculative decoding is not supported yet."