Add the support for the qwen3 next model (a hybrid attention model). (#24526)

Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
2025-09-11 15:32:09 +08:00
parent 2048c4e379
commit e93f4cc9e3
29 changed files with 2476 additions and 61 deletions
--- a/vllm/v1/kv_cache_interface.py
+++ b/vllm/v1/kv_cache_interface.py
@@ -194,6 +194,7 @@ class MambaSpec(KVCacheSpec):
    dtypes: tuple[torch.dtype]
    page_size_padded: Optional[int] = None
    mamba_type: str = "mamba2"
+    num_speculative_blocks: int = 0

    @property
    def page_size_bytes(self) -> int: