[VLM][Core] Support profiling with multiple multi-modal inputs per prompt (#7126)

This commit is contained in:
Cyrus Leung
2024-08-15 01:55:42 +08:00
committed by GitHub
parent 70b746efcf
commit 3f674a49b5
38 changed files with 572 additions and 216 deletions

View File

@@ -11,14 +11,11 @@ logger = init_logger(__name__)
@runtime_checkable
class SupportsMultiModal(Protocol):
"""
The interface required for all multimodal (vision or audio) language
models.
"""
"""The interface required for all multi-modal models."""
supports_multimodal: ClassVar[Literal[True]] = True
"""
A flag that indicates this model supports multimodal inputs.
A flag that indicates this model supports multi-modal inputs.
Note:
There is no need to redefine this flag if this class is in the