[CLI][Doc] Formalize --mm-encoder-tp-mode (#23190)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -28,12 +28,12 @@ from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
|
||||
DeviceConfig, DistributedExecutorBackend,
|
||||
GuidedDecodingBackend, HfOverrides, KVEventsConfig,
|
||||
KVTransferConfig, LoadConfig, LogprobsMode,
|
||||
LoRAConfig, MambaDType, ModelConfig, ModelDType,
|
||||
ModelImpl, MultiModalConfig, ObservabilityConfig,
|
||||
ParallelConfig, PoolerConfig, PrefixCachingHashAlgo,
|
||||
RunnerOption, SchedulerConfig, SchedulerPolicy,
|
||||
SpeculativeConfig, TaskOption, TokenizerMode,
|
||||
VllmConfig, get_attr_docs, get_field)
|
||||
LoRAConfig, MambaDType, MMEncoderTPMode, ModelConfig,
|
||||
ModelDType, ModelImpl, MultiModalConfig,
|
||||
ObservabilityConfig, ParallelConfig, PoolerConfig,
|
||||
PrefixCachingHashAlgo, RunnerOption, SchedulerConfig,
|
||||
SchedulerPolicy, SpeculativeConfig, TaskOption,
|
||||
TokenizerMode, VllmConfig, get_attr_docs, get_field)
|
||||
from vllm.logger import init_logger
|
||||
from vllm.platforms import CpuArchEnum, current_platform
|
||||
from vllm.plugins import load_general_plugins
|
||||
@@ -352,6 +352,7 @@ class EngineArgs:
|
||||
MultiModalConfig.mm_processor_kwargs
|
||||
disable_mm_preprocessor_cache: bool = False # DEPRECATED
|
||||
mm_processor_cache_gb: int = MultiModalConfig.mm_processor_cache_gb
|
||||
mm_encoder_tp_mode: MMEncoderTPMode = MultiModalConfig.mm_encoder_tp_mode
|
||||
skip_mm_profiling: bool = MultiModalConfig.skip_mm_profiling
|
||||
# LoRA fields
|
||||
enable_lora: bool = False
|
||||
@@ -434,16 +435,14 @@ class EngineArgs:
|
||||
use_tqdm_on_load: bool = LoadConfig.use_tqdm_on_load
|
||||
pt_load_map_location: str = LoadConfig.pt_load_map_location
|
||||
|
||||
enable_multimodal_encoder_data_parallel: bool = \
|
||||
ParallelConfig.enable_multimodal_encoder_data_parallel
|
||||
# DEPRECATED
|
||||
enable_multimodal_encoder_data_parallel: bool = False
|
||||
|
||||
logits_processors: Optional[list[Union[
|
||||
str, type[LogitsProcessor]]]] = ModelConfig.logits_processors
|
||||
"""Custom logitproc types"""
|
||||
|
||||
async_scheduling: bool = SchedulerConfig.async_scheduling
|
||||
# DEPRECATED
|
||||
enable_prompt_adapter: bool = False
|
||||
|
||||
kv_sharing_fast_prefill: bool = \
|
||||
CacheConfig.kv_sharing_fast_prefill
|
||||
@@ -685,7 +684,8 @@ class EngineArgs:
|
||||
**parallel_kwargs["worker_extension_cls"])
|
||||
parallel_group.add_argument(
|
||||
"--enable-multimodal-encoder-data-parallel",
|
||||
**parallel_kwargs["enable_multimodal_encoder_data_parallel"])
|
||||
action="store_true",
|
||||
deprecated=True)
|
||||
|
||||
# KV cache arguments
|
||||
cache_kwargs = get_kwargs(CacheConfig)
|
||||
@@ -735,6 +735,8 @@ class EngineArgs:
|
||||
multimodal_group.add_argument("--disable-mm-preprocessor-cache",
|
||||
action="store_true",
|
||||
deprecated=True)
|
||||
multimodal_group.add_argument(
|
||||
"--mm-encoder-tp-mode", **multimodal_kwargs["mm_encoder_tp_mode"])
|
||||
multimodal_group.add_argument(
|
||||
"--interleave-mm-strings",
|
||||
**multimodal_kwargs["interleave_mm_strings"])
|
||||
@@ -909,6 +911,14 @@ class EngineArgs:
|
||||
|
||||
self.mm_processor_cache_gb = envs.VLLM_MM_INPUT_CACHE_GIB
|
||||
|
||||
if self.enable_multimodal_encoder_data_parallel:
|
||||
logger.warning(
|
||||
"--enable-multimodal-encoder-data-parallel` is deprecated "
|
||||
"and will be removed in v0.13. "
|
||||
"Please use `--mm-encoder-tp-mode data` instead.")
|
||||
|
||||
self.mm_encoder_tp_mode = "data"
|
||||
|
||||
return ModelConfig(
|
||||
model=self.model,
|
||||
hf_config_path=self.hf_config_path,
|
||||
@@ -947,6 +957,7 @@ class EngineArgs:
|
||||
config_format=self.config_format,
|
||||
mm_processor_kwargs=self.mm_processor_kwargs,
|
||||
mm_processor_cache_gb=self.mm_processor_cache_gb,
|
||||
mm_encoder_tp_mode=self.mm_encoder_tp_mode,
|
||||
override_neuron_config=self.override_neuron_config,
|
||||
override_pooler_config=self.override_pooler_config,
|
||||
logits_processor_pattern=self.logits_processor_pattern,
|
||||
@@ -1258,8 +1269,6 @@ class EngineArgs:
|
||||
distributed_executor_backend=self.distributed_executor_backend,
|
||||
worker_cls=self.worker_cls,
|
||||
worker_extension_cls=self.worker_extension_cls,
|
||||
enable_multimodal_encoder_data_parallel=self.
|
||||
enable_multimodal_encoder_data_parallel,
|
||||
)
|
||||
|
||||
if model_config.is_multimodal_model:
|
||||
|
||||
Reference in New Issue
Block a user