[Core] Shared memory based object store for Multimodal data caching and IPC (#20452)

Signed-off-by: donglu <donglu@cohere.com>
This commit is contained in:
dongluw
2025-09-12 10:54:17 -04:00
committed by GitHub
parent 9f04d9d55f
commit a5b84f1cbf
17 changed files with 1487 additions and 27 deletions

View File

@@ -27,8 +27,8 @@ from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
DistributedExecutorBackend, EPLBConfig,
GuidedDecodingBackend, HfOverrides, KVEventsConfig,
KVTransferConfig, LoadConfig, LogprobsMode,
LoRAConfig, MambaDType, MMEncoderTPMode, ModelConfig,
ModelDType, ModelImpl, MultiModalConfig,
LoRAConfig, MambaDType, MMCacheType, MMEncoderTPMode,
ModelConfig, ModelDType, ModelImpl, MultiModalConfig,
ObservabilityConfig, ParallelConfig, PoolerConfig,
PrefixCachingHashAlgo, RunnerOption, SchedulerConfig,
SchedulerPolicy, SpeculativeConfig, TaskOption,
@@ -373,6 +373,10 @@ class EngineArgs:
MultiModalConfig.mm_processor_kwargs
disable_mm_preprocessor_cache: bool = False # DEPRECATED
mm_processor_cache_gb: float = MultiModalConfig.mm_processor_cache_gb
mm_processor_cache_type: Optional[MMCacheType] = \
MultiModalConfig.mm_processor_cache_type
mm_shm_cache_max_object_size_mb: int = \
MultiModalConfig.mm_shm_cache_max_object_size_mb
mm_encoder_tp_mode: MMEncoderTPMode = MultiModalConfig.mm_encoder_tp_mode
io_processor_plugin: Optional[str] = None
skip_mm_profiling: bool = MultiModalConfig.skip_mm_profiling
@@ -782,6 +786,12 @@ class EngineArgs:
multimodal_group.add_argument("--disable-mm-preprocessor-cache",
action="store_true",
deprecated=True)
multimodal_group.add_argument(
"--mm-processor-cache-type",
**multimodal_kwargs["mm_processor_cache_type"])
multimodal_group.add_argument(
"--mm-shm-cache-max-object-size-mb",
**multimodal_kwargs["mm_shm_cache_max_object_size_mb"])
multimodal_group.add_argument(
"--mm-encoder-tp-mode", **multimodal_kwargs["mm_encoder_tp_mode"])
multimodal_group.add_argument(
@@ -998,6 +1008,9 @@ class EngineArgs:
config_format=self.config_format,
mm_processor_kwargs=self.mm_processor_kwargs,
mm_processor_cache_gb=self.mm_processor_cache_gb,
mm_processor_cache_type=self.mm_processor_cache_type,
mm_shm_cache_max_object_size_mb=self.
mm_shm_cache_max_object_size_mb,
mm_encoder_tp_mode=self.mm_encoder_tp_mode,
override_pooler_config=self.override_pooler_config,
logits_processor_pattern=self.logits_processor_pattern,