[Deprecation] Remove deprecated task, seed and MM settings (#30397)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -71,7 +71,6 @@ from vllm.config.model import (
|
||||
LogprobsMode,
|
||||
ModelDType,
|
||||
RunnerOption,
|
||||
TaskOption,
|
||||
TokenizerMode,
|
||||
)
|
||||
from vllm.config.multimodal import MMCacheType, MMEncoderTPMode
|
||||
@@ -360,7 +359,6 @@ class EngineArgs:
|
||||
hf_config_path: str | None = ModelConfig.hf_config_path
|
||||
runner: RunnerOption = ModelConfig.runner
|
||||
convert: ConvertOption = ModelConfig.convert
|
||||
task: TaskOption | None = ModelConfig.task
|
||||
skip_tokenizer_init: bool = ModelConfig.skip_tokenizer_init
|
||||
enable_prompt_embeds: bool = ModelConfig.enable_prompt_embeds
|
||||
tokenizer_mode: TokenizerMode | str = ModelConfig.tokenizer_mode
|
||||
@@ -373,7 +371,7 @@ class EngineArgs:
|
||||
config_format: str = ModelConfig.config_format
|
||||
dtype: ModelDType = ModelConfig.dtype
|
||||
kv_cache_dtype: CacheDType = CacheConfig.cache_dtype
|
||||
seed: int | None = 0
|
||||
seed: int = ModelConfig.seed
|
||||
max_model_len: int | None = ModelConfig.max_model_len
|
||||
cudagraph_capture_sizes: list[int] | None = (
|
||||
CompilationConfig.cudagraph_capture_sizes
|
||||
@@ -462,7 +460,6 @@ class EngineArgs:
|
||||
MultiModalConfig, "media_io_kwargs"
|
||||
)
|
||||
mm_processor_kwargs: dict[str, Any] | None = MultiModalConfig.mm_processor_kwargs
|
||||
disable_mm_preprocessor_cache: bool = False # DEPRECATED
|
||||
mm_processor_cache_gb: float = MultiModalConfig.mm_processor_cache_gb
|
||||
mm_processor_cache_type: MMCacheType | None = (
|
||||
MultiModalConfig.mm_processor_cache_type
|
||||
@@ -558,9 +555,6 @@ class EngineArgs:
|
||||
use_tqdm_on_load: bool = LoadConfig.use_tqdm_on_load
|
||||
pt_load_map_location: str = LoadConfig.pt_load_map_location
|
||||
|
||||
# DEPRECATED
|
||||
enable_multimodal_encoder_data_parallel: bool = False
|
||||
|
||||
logits_processors: list[str | type[LogitsProcessor]] | None = (
|
||||
ModelConfig.logits_processors
|
||||
)
|
||||
@@ -628,7 +622,6 @@ class EngineArgs:
|
||||
model_group.add_argument("--model", **model_kwargs["model"])
|
||||
model_group.add_argument("--runner", **model_kwargs["runner"])
|
||||
model_group.add_argument("--convert", **model_kwargs["convert"])
|
||||
model_group.add_argument("--task", **model_kwargs["task"], deprecated=True)
|
||||
model_group.add_argument("--tokenizer", **model_kwargs["tokenizer"])
|
||||
model_group.add_argument("--tokenizer-mode", **model_kwargs["tokenizer_mode"])
|
||||
model_group.add_argument(
|
||||
@@ -882,11 +875,6 @@ class EngineArgs:
|
||||
parallel_group.add_argument(
|
||||
"--worker-extension-cls", **parallel_kwargs["worker_extension_cls"]
|
||||
)
|
||||
parallel_group.add_argument(
|
||||
"--enable-multimodal-encoder-data-parallel",
|
||||
action="store_true",
|
||||
deprecated=True,
|
||||
)
|
||||
|
||||
# KV cache arguments
|
||||
cache_kwargs = get_kwargs(CacheConfig)
|
||||
@@ -960,9 +948,6 @@ class EngineArgs:
|
||||
multimodal_group.add_argument(
|
||||
"--mm-processor-cache-gb", **multimodal_kwargs["mm_processor_cache_gb"]
|
||||
)
|
||||
multimodal_group.add_argument(
|
||||
"--disable-mm-preprocessor-cache", action="store_true", deprecated=True
|
||||
)
|
||||
multimodal_group.add_argument(
|
||||
"--mm-processor-cache-type", **multimodal_kwargs["mm_processor_cache_type"]
|
||||
)
|
||||
@@ -1192,62 +1177,20 @@ class EngineArgs:
|
||||
if is_gguf(self.model):
|
||||
self.quantization = self.load_format = "gguf"
|
||||
|
||||
# NOTE(woosuk): In V1, we use separate processes for workers (unless
|
||||
# VLLM_ENABLE_V1_MULTIPROCESSING=0), so setting a seed here
|
||||
# doesn't affect the user process.
|
||||
if self.seed is None:
|
||||
logger.warning_once(
|
||||
"`seed=None` is equivalent to `seed=0` in V1 Engine. "
|
||||
"You will no longer be allowed to pass `None` in v0.13.",
|
||||
scope="local",
|
||||
if not envs.VLLM_ENABLE_V1_MULTIPROCESSING:
|
||||
logger.warning(
|
||||
"The global random seed is set to %d. Since "
|
||||
"VLLM_ENABLE_V1_MULTIPROCESSING is set to False, this may "
|
||||
"affect the random state of the Python process that "
|
||||
"launched vLLM.",
|
||||
self.seed,
|
||||
)
|
||||
|
||||
self.seed = 0
|
||||
if not envs.VLLM_ENABLE_V1_MULTIPROCESSING:
|
||||
logger.warning(
|
||||
"The global random seed is set to %d. Since "
|
||||
"VLLM_ENABLE_V1_MULTIPROCESSING is set to False, this may "
|
||||
"affect the random state of the Python process that "
|
||||
"launched vLLM.",
|
||||
self.seed,
|
||||
)
|
||||
|
||||
if self.disable_mm_preprocessor_cache:
|
||||
logger.warning_once(
|
||||
"`--disable-mm-preprocessor-cache` is deprecated "
|
||||
"and will be removed in v0.13. "
|
||||
"Please use `--mm-processor-cache-gb 0` instead.",
|
||||
scope="local",
|
||||
)
|
||||
|
||||
self.mm_processor_cache_gb = 0
|
||||
elif envs.VLLM_MM_INPUT_CACHE_GIB != 4:
|
||||
logger.warning_once(
|
||||
"VLLM_MM_INPUT_CACHE_GIB` is deprecated "
|
||||
"and will be removed in v0.13. "
|
||||
"Please use `--mm-processor-cache-gb %d` instead.",
|
||||
envs.VLLM_MM_INPUT_CACHE_GIB,
|
||||
scope="local",
|
||||
)
|
||||
|
||||
self.mm_processor_cache_gb = envs.VLLM_MM_INPUT_CACHE_GIB
|
||||
|
||||
if self.enable_multimodal_encoder_data_parallel:
|
||||
logger.warning_once(
|
||||
"--enable-multimodal-encoder-data-parallel` is deprecated "
|
||||
"and will be removed in v0.13. "
|
||||
"Please use `--mm-encoder-tp-mode data` instead.",
|
||||
scope="local",
|
||||
)
|
||||
|
||||
self.mm_encoder_tp_mode = "data"
|
||||
|
||||
return ModelConfig(
|
||||
model=self.model,
|
||||
hf_config_path=self.hf_config_path,
|
||||
runner=self.runner,
|
||||
convert=self.convert,
|
||||
task=self.task,
|
||||
tokenizer=self.tokenizer,
|
||||
tokenizer_mode=self.tokenizer_mode,
|
||||
trust_remote_code=self.trust_remote_code,
|
||||
|
||||
Reference in New Issue
Block a user