EVS Support (Video tokens pruning) (#22980)
Signed-off-by: Eugene Khvedchenia <ekhvedchenia@nvidia.com> Signed-off-by: Eugene Khvedchenya <ekhvedchenya@gmail.com> Co-authored-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
committed by
GitHub
parent
983056e456
commit
392edee34a
@@ -78,6 +78,11 @@ class MultiModalConfig:
|
||||
This reduces engine startup time but shifts the responsibility to users for
|
||||
estimating the peak memory usage of the activation of multimodal encoder and
|
||||
embedding cache."""
|
||||
video_pruning_rate: Optional[float] = None
|
||||
"""Sets pruning rate for video pruning via Efficient Video Sampling.
|
||||
Value sits in range [0;1) and determines fraction of media tokens
|
||||
from each video to be pruned.
|
||||
"""
|
||||
|
||||
def compute_hash(self) -> str:
|
||||
"""
|
||||
@@ -118,3 +123,7 @@ class MultiModalConfig:
|
||||
"""
|
||||
kwargs = self.mm_processor_kwargs or {}
|
||||
return kwargs | dict(inference_kwargs)
|
||||
|
||||
def is_multimodal_pruning_enabled(self):
|
||||
return (self.video_pruning_rate is not None
|
||||
and self.video_pruning_rate > 0)
|
||||
|
||||
Reference in New Issue
Block a user