EVS Support (Video tokens pruning) (#22980)

Signed-off-by: Eugene Khvedchenia <ekhvedchenia@nvidia.com>
Signed-off-by: Eugene Khvedchenya <ekhvedchenya@gmail.com>
Co-authored-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
Eugene Khvedchenya
2025-09-26 06:54:54 +03:00
committed by GitHub
parent 983056e456
commit 392edee34a
8 changed files with 783 additions and 39 deletions

View File

@@ -283,6 +283,7 @@ class ModelConfig:
mm_encoder_tp_mode: InitVar[Optional[MMEncoderTPMode]] = None
interleave_mm_strings: InitVar[Optional[bool]] = None
skip_mm_profiling: InitVar[Optional[bool]] = None
video_pruning_rate: InitVar[Optional[float]] = None
def compute_hash(self) -> str:
"""
@@ -311,6 +312,7 @@ class ModelConfig:
factors.append(self.override_generation_config)
factors.append(self.rope_scaling)
factors.append(self.rope_theta)
factors.append(self.video_pruning_rate)
# hf_config can control how the model looks!
try:
@@ -338,17 +340,19 @@ class ModelConfig:
return hashlib.sha256(str(factors).encode()).hexdigest()
def __post_init__(
self,
# Multimodal config init vars
limit_mm_per_prompt: Optional[dict[str, int]],
media_io_kwargs: Optional[dict[str, dict[str, Any]]],
mm_processor_kwargs: Optional[dict[str, Any]],
mm_processor_cache_gb: Optional[float],
mm_processor_cache_type: Optional[MMCacheType],
mm_shm_cache_max_object_size_mb: Optional[int],
mm_encoder_tp_mode: Optional[MMEncoderTPMode],
interleave_mm_strings: Optional[bool],
skip_mm_profiling: Optional[bool]) -> None:
self,
# Multimodal config init vars
limit_mm_per_prompt: Optional[dict[str, int]],
media_io_kwargs: Optional[dict[str, dict[str, Any]]],
mm_processor_kwargs: Optional[dict[str, Any]],
mm_processor_cache_gb: Optional[float],
mm_processor_cache_type: Optional[MMCacheType],
mm_shm_cache_max_object_size_mb: Optional[int],
mm_encoder_tp_mode: Optional[MMEncoderTPMode],
interleave_mm_strings: Optional[bool],
skip_mm_profiling: Optional[bool],
video_pruning_rate: Optional[float],
) -> None:
# Set the default seed to 0 in V1.
# NOTE(woosuk): In V0, we set the default seed to None because the
# driver worker shares the same process as the user process, and thus
@@ -612,6 +616,7 @@ class ModelConfig:
mm_encoder_tp_mode=mm_encoder_tp_mode,
interleave_mm_strings=interleave_mm_strings,
skip_mm_profiling=skip_mm_profiling,
video_pruning_rate=video_pruning_rate,
)
mm_config_kwargs = {