Add the support for the qwen3 next model (a hybrid attention model). (#24526)

Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com>
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Tao He
2025-09-11 15:32:09 +08:00
committed by GitHub
parent 2048c4e379
commit e93f4cc9e3
29 changed files with 2476 additions and 61 deletions

View File

@@ -194,6 +194,7 @@ class MambaSpec(KVCacheSpec):
dtypes: tuple[torch.dtype]
page_size_padded: Optional[int] = None
mamba_type: str = "mamba2"
num_speculative_blocks: int = 0
@property
def page_size_bytes(self) -> int: