[VLM][Core] Support profiling with multiple multi-modal inputs per prompt (#7126)
This commit is contained in:
@@ -11,14 +11,11 @@ logger = init_logger(__name__)
|
||||
|
||||
@runtime_checkable
|
||||
class SupportsMultiModal(Protocol):
|
||||
"""
|
||||
The interface required for all multimodal (vision or audio) language
|
||||
models.
|
||||
"""
|
||||
"""The interface required for all multi-modal models."""
|
||||
|
||||
supports_multimodal: ClassVar[Literal[True]] = True
|
||||
"""
|
||||
A flag that indicates this model supports multimodal inputs.
|
||||
A flag that indicates this model supports multi-modal inputs.
|
||||
|
||||
Note:
|
||||
There is no need to redefine this flag if this class is in the
|
||||
|
||||
Reference in New Issue
Block a user