[Model] PP support for Mamba-like models (#10992)
Signed-off-by: mzusman <mor.zusmann@gmail.com>
This commit is contained in:
@@ -21,7 +21,7 @@ from vllm.logger import init_logger
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
from .adapters import as_embedding_model
|
||||
from .interfaces import (has_inner_state, is_attention_free,
|
||||
from .interfaces import (has_inner_state, is_attention_free, is_hybrid,
|
||||
supports_cross_encoding, supports_multimodal,
|
||||
supports_pp)
|
||||
from .interfaces_base import is_pooling_model, is_text_generation_model
|
||||
@@ -218,6 +218,7 @@ class _ModelInfo:
|
||||
supports_pp: bool
|
||||
has_inner_state: bool
|
||||
is_attention_free: bool
|
||||
is_hybrid: bool
|
||||
|
||||
@staticmethod
|
||||
def from_model_cls(model: Type[nn.Module]) -> "_ModelInfo":
|
||||
@@ -239,6 +240,7 @@ class _ModelInfo:
|
||||
supports_pp=supports_pp(model),
|
||||
has_inner_state=has_inner_state(model),
|
||||
is_attention_free=is_attention_free(model),
|
||||
is_hybrid=is_hybrid(model),
|
||||
)
|
||||
|
||||
|
||||
@@ -484,6 +486,13 @@ class _ModelRegistry:
|
||||
model_cls, _ = self.inspect_model_cls(architectures)
|
||||
return model_cls.is_attention_free
|
||||
|
||||
def is_hybrid_model(
|
||||
self,
|
||||
architectures: Union[str, List[str]],
|
||||
) -> bool:
|
||||
model_cls, _ = self.inspect_model_cls(architectures)
|
||||
return model_cls.is_hybrid
|
||||
|
||||
|
||||
ModelRegistry = _ModelRegistry({
|
||||
model_arch: _LazyRegisteredModel(
|
||||
|
||||
Reference in New Issue
Block a user