[Deprecation] Remove deprecated task, seed and MM settings (#30397)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-12-11 11:59:39 +08:00
committed by GitHub
parent 5a87d8b9b1
commit 7e24e5d4d6
16 changed files with 25 additions and 276 deletions

View File

@@ -71,7 +71,6 @@ from vllm.config.model import (
LogprobsMode,
ModelDType,
RunnerOption,
TaskOption,
TokenizerMode,
)
from vllm.config.multimodal import MMCacheType, MMEncoderTPMode
@@ -360,7 +359,6 @@ class EngineArgs:
hf_config_path: str | None = ModelConfig.hf_config_path
runner: RunnerOption = ModelConfig.runner
convert: ConvertOption = ModelConfig.convert
task: TaskOption | None = ModelConfig.task
skip_tokenizer_init: bool = ModelConfig.skip_tokenizer_init
enable_prompt_embeds: bool = ModelConfig.enable_prompt_embeds
tokenizer_mode: TokenizerMode | str = ModelConfig.tokenizer_mode
@@ -373,7 +371,7 @@ class EngineArgs:
config_format: str = ModelConfig.config_format
dtype: ModelDType = ModelConfig.dtype
kv_cache_dtype: CacheDType = CacheConfig.cache_dtype
seed: int | None = 0
seed: int = ModelConfig.seed
max_model_len: int | None = ModelConfig.max_model_len
cudagraph_capture_sizes: list[int] | None = (
CompilationConfig.cudagraph_capture_sizes
@@ -462,7 +460,6 @@ class EngineArgs:
MultiModalConfig, "media_io_kwargs"
)
mm_processor_kwargs: dict[str, Any] | None = MultiModalConfig.mm_processor_kwargs
disable_mm_preprocessor_cache: bool = False # DEPRECATED
mm_processor_cache_gb: float = MultiModalConfig.mm_processor_cache_gb
mm_processor_cache_type: MMCacheType | None = (
MultiModalConfig.mm_processor_cache_type
@@ -558,9 +555,6 @@ class EngineArgs:
use_tqdm_on_load: bool = LoadConfig.use_tqdm_on_load
pt_load_map_location: str = LoadConfig.pt_load_map_location
# DEPRECATED
enable_multimodal_encoder_data_parallel: bool = False
logits_processors: list[str | type[LogitsProcessor]] | None = (
ModelConfig.logits_processors
)
@@ -628,7 +622,6 @@ class EngineArgs:
model_group.add_argument("--model", **model_kwargs["model"])
model_group.add_argument("--runner", **model_kwargs["runner"])
model_group.add_argument("--convert", **model_kwargs["convert"])
model_group.add_argument("--task", **model_kwargs["task"], deprecated=True)
model_group.add_argument("--tokenizer", **model_kwargs["tokenizer"])
model_group.add_argument("--tokenizer-mode", **model_kwargs["tokenizer_mode"])
model_group.add_argument(
@@ -882,11 +875,6 @@ class EngineArgs:
parallel_group.add_argument(
"--worker-extension-cls", **parallel_kwargs["worker_extension_cls"]
)
parallel_group.add_argument(
"--enable-multimodal-encoder-data-parallel",
action="store_true",
deprecated=True,
)
# KV cache arguments
cache_kwargs = get_kwargs(CacheConfig)
@@ -960,9 +948,6 @@ class EngineArgs:
multimodal_group.add_argument(
"--mm-processor-cache-gb", **multimodal_kwargs["mm_processor_cache_gb"]
)
multimodal_group.add_argument(
"--disable-mm-preprocessor-cache", action="store_true", deprecated=True
)
multimodal_group.add_argument(
"--mm-processor-cache-type", **multimodal_kwargs["mm_processor_cache_type"]
)
@@ -1192,62 +1177,20 @@ class EngineArgs:
if is_gguf(self.model):
self.quantization = self.load_format = "gguf"
# NOTE(woosuk): In V1, we use separate processes for workers (unless
# VLLM_ENABLE_V1_MULTIPROCESSING=0), so setting a seed here
# doesn't affect the user process.
if self.seed is None:
logger.warning_once(
"`seed=None` is equivalent to `seed=0` in V1 Engine. "
"You will no longer be allowed to pass `None` in v0.13.",
scope="local",
if not envs.VLLM_ENABLE_V1_MULTIPROCESSING:
logger.warning(
"The global random seed is set to %d. Since "
"VLLM_ENABLE_V1_MULTIPROCESSING is set to False, this may "
"affect the random state of the Python process that "
"launched vLLM.",
self.seed,
)
self.seed = 0
if not envs.VLLM_ENABLE_V1_MULTIPROCESSING:
logger.warning(
"The global random seed is set to %d. Since "
"VLLM_ENABLE_V1_MULTIPROCESSING is set to False, this may "
"affect the random state of the Python process that "
"launched vLLM.",
self.seed,
)
if self.disable_mm_preprocessor_cache:
logger.warning_once(
"`--disable-mm-preprocessor-cache` is deprecated "
"and will be removed in v0.13. "
"Please use `--mm-processor-cache-gb 0` instead.",
scope="local",
)
self.mm_processor_cache_gb = 0
elif envs.VLLM_MM_INPUT_CACHE_GIB != 4:
logger.warning_once(
"VLLM_MM_INPUT_CACHE_GIB` is deprecated "
"and will be removed in v0.13. "
"Please use `--mm-processor-cache-gb %d` instead.",
envs.VLLM_MM_INPUT_CACHE_GIB,
scope="local",
)
self.mm_processor_cache_gb = envs.VLLM_MM_INPUT_CACHE_GIB
if self.enable_multimodal_encoder_data_parallel:
logger.warning_once(
"--enable-multimodal-encoder-data-parallel` is deprecated "
"and will be removed in v0.13. "
"Please use `--mm-encoder-tp-mode data` instead.",
scope="local",
)
self.mm_encoder_tp_mode = "data"
return ModelConfig(
model=self.model,
hf_config_path=self.hf_config_path,
runner=self.runner,
convert=self.convert,
task=self.task,
tokenizer=self.tokenizer,
tokenizer_mode=self.tokenizer_mode,
trust_remote_code=self.trust_remote_code,