[Core] Use key-only cache for BaseMultiModalProcessor (#23018)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-08-27 14:19:13 +08:00
committed by GitHub
parent 8dbf6ed7be
commit 69244e67e6
29 changed files with 954 additions and 394 deletions

View File

@@ -437,7 +437,7 @@ class ModelConfig:
from `AutoProcessor.from_pretrained`. The available overrides depend on the
model that is being run. For example, for Phi-3-Vision: `{"num_crops": 4}`.
"""
mm_processor_cache_gb: int = 4
mm_processor_cache_gb: float = 4
"""The size (in GiB) of the multi-modal processor cache, which is used to
avoid re-processing past multi-modal inputs.
@@ -884,12 +884,6 @@ class ModelConfig:
return None
def set_mm_processor_cache_gb(self, value: int) -> None:
mm_config = self.get_multimodal_config()
self.mm_processor_cache_gb = value
mm_config.mm_processor_cache_gb = value
def _get_encoder_config(self):
return get_sentence_transformer_tokenizer_config(
self.model, self.revision)
@@ -1697,22 +1691,6 @@ class ModelConfig:
def is_multimodal_model(self) -> bool:
return self.multimodal_config is not None
@property
def enable_mm_processor_cache(self) -> bool:
"""Whether the multi-modal processor cache should be enabled."""
mm_config = self.multimodal_config
if mm_config is None:
return False
return mm_config.mm_processor_cache_gb > 0
def get_mm_input_cache_gb(self) -> int:
mm_config = self.multimodal_config
if mm_config is None:
return 0
return envs.VLLM_MM_INPUT_CACHE_GIB
@property
def is_cross_encoder(self) -> bool:
return (self._model_info.supports_cross_encoding
@@ -2561,7 +2539,7 @@ class MultiModalConfig:
`{"num_crops": 4}`.
"""
mm_processor_cache_gb: int = 4
mm_processor_cache_gb: float = 4
"""
The size (in GiB) of the multi-modal processor cache, which is used to