[V1] Enable Mamba2 layers other than MambaMixer2 in the v1 engine (#20660)

Signed-off-by: nopperl <54780682+nopperl@users.noreply.github.com>
This commit is contained in:
nopperl
2025-07-11 14:53:31 +09:00
committed by GitHub
parent 31d5c1797f
commit 5d09152ff1
11 changed files with 68 additions and 45 deletions

View File

@@ -1331,6 +1331,17 @@ class ModelConfig:
return sum(t == 1 for t in attn_type_list[start:end])
def get_mamba_chunk_size(self) -> Optional[int]:
"""
Returns the mamba chunk size if it exists
"""
# used by e.g. Bamba, FalconH1, Granite, PLaMo2
chunk_size = getattr(self.hf_text_config, "mamba_chunk_size", None)
if chunk_size is None:
# used by e.g. Mamba2, NemotronH, Zamba
chunk_size = getattr(self.hf_text_config, "chunk_size", None)
return chunk_size
def get_multimodal_config(self) -> "MultiModalConfig":
"""
Get the multimodal configuration of the model.