[Model] PP support for Mamba-like models (#10992)

Signed-off-by: mzusman <mor.zusmann@gmail.com>
This commit is contained in:
Mor Zusman
2024-12-11 04:53:37 +02:00
committed by GitHub
parent d5c5154fcf
commit ffa48c9146
11 changed files with 227 additions and 79 deletions

View File

@@ -21,7 +21,7 @@ from vllm.logger import init_logger
from vllm.platforms import current_platform
from .adapters import as_embedding_model
from .interfaces import (has_inner_state, is_attention_free,
from .interfaces import (has_inner_state, is_attention_free, is_hybrid,
supports_cross_encoding, supports_multimodal,
supports_pp)
from .interfaces_base import is_pooling_model, is_text_generation_model
@@ -218,6 +218,7 @@ class _ModelInfo:
supports_pp: bool
has_inner_state: bool
is_attention_free: bool
is_hybrid: bool
@staticmethod
def from_model_cls(model: Type[nn.Module]) -> "_ModelInfo":
@@ -239,6 +240,7 @@ class _ModelInfo:
supports_pp=supports_pp(model),
has_inner_state=has_inner_state(model),
is_attention_free=is_attention_free(model),
is_hybrid=is_hybrid(model),
)
@@ -484,6 +486,13 @@ class _ModelRegistry:
model_cls, _ = self.inspect_model_cls(architectures)
return model_cls.is_attention_free
def is_hybrid_model(
self,
architectures: Union[str, List[str]],
) -> bool:
model_cls, _ = self.inspect_model_cls(architectures)
return model_cls.is_hybrid
ModelRegistry = _ModelRegistry({
model_arch: _LazyRegisteredModel(