[Core] Use key-only cache for BaseMultiModalProcessor (#23018)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -437,7 +437,7 @@ class ModelConfig:
|
||||
from `AutoProcessor.from_pretrained`. The available overrides depend on the
|
||||
model that is being run. For example, for Phi-3-Vision: `{"num_crops": 4}`.
|
||||
"""
|
||||
mm_processor_cache_gb: int = 4
|
||||
mm_processor_cache_gb: float = 4
|
||||
"""The size (in GiB) of the multi-modal processor cache, which is used to
|
||||
avoid re-processing past multi-modal inputs.
|
||||
|
||||
@@ -884,12 +884,6 @@ class ModelConfig:
|
||||
|
||||
return None
|
||||
|
||||
def set_mm_processor_cache_gb(self, value: int) -> None:
|
||||
mm_config = self.get_multimodal_config()
|
||||
|
||||
self.mm_processor_cache_gb = value
|
||||
mm_config.mm_processor_cache_gb = value
|
||||
|
||||
def _get_encoder_config(self):
|
||||
return get_sentence_transformer_tokenizer_config(
|
||||
self.model, self.revision)
|
||||
@@ -1697,22 +1691,6 @@ class ModelConfig:
|
||||
def is_multimodal_model(self) -> bool:
|
||||
return self.multimodal_config is not None
|
||||
|
||||
@property
|
||||
def enable_mm_processor_cache(self) -> bool:
|
||||
"""Whether the multi-modal processor cache should be enabled."""
|
||||
mm_config = self.multimodal_config
|
||||
if mm_config is None:
|
||||
return False
|
||||
|
||||
return mm_config.mm_processor_cache_gb > 0
|
||||
|
||||
def get_mm_input_cache_gb(self) -> int:
|
||||
mm_config = self.multimodal_config
|
||||
if mm_config is None:
|
||||
return 0
|
||||
|
||||
return envs.VLLM_MM_INPUT_CACHE_GIB
|
||||
|
||||
@property
|
||||
def is_cross_encoder(self) -> bool:
|
||||
return (self._model_info.supports_cross_encoding
|
||||
@@ -2561,7 +2539,7 @@ class MultiModalConfig:
|
||||
`{"num_crops": 4}`.
|
||||
"""
|
||||
|
||||
mm_processor_cache_gb: int = 4
|
||||
mm_processor_cache_gb: float = 4
|
||||
"""
|
||||
The size (in GiB) of the multi-modal processor cache, which is used to
|
||||
|
||||
|
||||
Reference in New Issue
Block a user