Add the support for the qwen3 next model (a hybrid attention model). (#24526)
Signed-off-by: Tao He <linzhu.ht@alibaba-inc.com> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -194,6 +194,7 @@ class MambaSpec(KVCacheSpec):
|
||||
dtypes: tuple[torch.dtype]
|
||||
page_size_padded: Optional[int] = None
|
||||
mamba_type: str = "mamba2"
|
||||
num_speculative_blocks: int = 0
|
||||
|
||||
@property
|
||||
def page_size_bytes(self) -> int:
|
||||
|
||||
Reference in New Issue
Block a user