diff --git a/docs/design/plugin_system.md b/docs/design/plugin_system.md index d674f7740..e5c9cea17 100644 --- a/docs/design/plugin_system.md +++ b/docs/design/plugin_system.md @@ -155,4 +155,4 @@ The interface for the model/module may change during vLLM's development. If you - `use_v1` parameter in `Platform.get_attn_backend_cls` is deprecated. It has been removed in v0.13.0. - `_Backend` in `vllm.attention` is deprecated. It has been removed in v0.13.0. Please use `vllm.v1.attention.backends.registry.register_backend` to add new attention backend to `AttentionBackendEnum` instead. - `seed_everything` platform interface is deprecated. It has been removed in v0.16.0. Please use `vllm.utils.torch_utils.set_random_seed` instead. - - `prompt` in `Platform.validate_request` is deprecated and will be removed in v0.18.0. + - `prompt` in `Platform.validate_request` is deprecated. It has been removed in v0.18.0. diff --git a/tests/transformers_utils/test_config.py b/tests/transformers_utils/test_config.py index 85680c41e..5a7421b6a 100644 --- a/tests/transformers_utils/test_config.py +++ b/tests/transformers_utils/test_config.py @@ -3,7 +3,7 @@ """ This test file includes some cases where it is inappropriate to only get the `eos_token_id` from the tokenizer as defined by -`vllm.LLMEngine._get_eos_token_id`. +`BaseRenderer.get_eos_token_id`. """ from vllm.tokenizers import get_tokenizer diff --git a/vllm/multimodal/processing/processor.py b/vllm/multimodal/processing/processor.py index 002c48c77..839128fbf 100644 --- a/vllm/multimodal/processing/processor.py +++ b/vllm/multimodal/processing/processor.py @@ -986,14 +986,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]): self.dummy_inputs = dummy_inputs self.cache = cache - # TODO: Remove in v0.18 - if hasattr(self, "_get_data_parser"): - raise ValueError( - "BaseMultiModalProcessor._get_data_parser has been " - "moved to `BaseProcessingInfo.build_data_parser` in v0.16. " - "You should override `BaseProcessingInfo.build_data_parser` instead." - ) - self.data_parser = self.info.get_data_parser() def __call__( diff --git a/vllm/plugins/io_processors/__init__.py b/vllm/plugins/io_processors/__init__.py index 86ebe41b0..c8cb4f185 100644 --- a/vllm/plugins/io_processors/__init__.py +++ b/vllm/plugins/io_processors/__init__.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import inspect import logging from vllm.config import VllmConfig @@ -67,16 +66,6 @@ def get_io_processor( f"Available plugins: {list(loadable_plugins.keys())}" ) - activated_plugin_cls = loadable_plugins[model_plugin] + activated_plugin_cls = resolve_obj_by_qualname(loadable_plugins[model_plugin]) - activated_plugin_typ = resolve_obj_by_qualname(activated_plugin_cls) - - # for backward compatibility, the plugin does not have a renderer argument - if "renderer" not in inspect.signature(activated_plugin_typ.__init__).parameters: - logger.warning( - "The renderer argument will be required in v0.18, " - "please update your IOProcessor plugin: %s", - activated_plugin_cls, - ) - return activated_plugin_typ(vllm_config) - return activated_plugin_typ(vllm_config, renderer) + return activated_plugin_cls(vllm_config, renderer) diff --git a/vllm/v1/engine/__init__.py b/vllm/v1/engine/__init__.py index 969b441da..d76948bc2 100644 --- a/vllm/v1/engine/__init__.py +++ b/vllm/v1/engine/__init__.py @@ -9,7 +9,6 @@ from typing import Any, Literal import msgspec import numpy as np import torch -from typing_extensions import deprecated from vllm.lora.request import LoRARequest from vllm.multimodal.inputs import MultiModalFeatureSpec @@ -110,17 +109,6 @@ class EngineCoreRequest( assert self.pooling_params is not None return self.pooling_params - @property - @deprecated( - "EngineCoreRequest.eos_token_id will be removed in v0.18. " - "Please use EngineCoreRequest.sampling_params.eos_token_id instead." - ) - def eos_token_id(self) -> int | None: - if self.sampling_params is None: - return None - - return self.sampling_params.eos_token_id - class EngineCoreEventType(enum.IntEnum): """The type of engine core request event.""" diff --git a/vllm/v1/engine/input_processor.py b/vllm/v1/engine/input_processor.py index fe062bde4..aab560544 100644 --- a/vllm/v1/engine/input_processor.py +++ b/vllm/v1/engine/input_processor.py @@ -22,13 +22,13 @@ from vllm.multimodal.inputs import ( MultiModalFeatureSpec, ) from vllm.multimodal.utils import argsort_mm_positions +from vllm.platforms import current_platform from vllm.pooling_params import PoolingParams from vllm.renderers import BaseRenderer, renderer_from_config from vllm.sampling_params import SamplingParams from vllm.tasks import GENERATION_TASKS, POOLING_TASKS, SupportedTask from vllm.tokenizers import TokenizerLike from vllm.utils import length_from_prompt_token_ids_or_embeds, random_uuid -from vllm.utils.func_utils import supports_kw from vllm.utils.jsontree import json_iter_leaves from vllm.v1.engine import EngineCoreRequest @@ -73,33 +73,6 @@ class InputProcessor: mm_registry=mm_registry, ) - from vllm.platforms import current_platform - - platform_validate_request = current_platform.validate_request - if supports_kw(platform_validate_request, "prompt"): - logger.warning_once( - "The signature of Platform.validate_request has changed from " - "`(cls, prompt, params, processed_inputs) -> None` to " - "`(cls, processed_inputs, params) -> None`. The old signature " - "will no longer be supported starting from v0.18." - ) - - orig_validate_request = platform_validate_request - - def compat_validate_request( - processed_inputs: ProcessorInputs, - params: SamplingParams | PoolingParams, - ): - return orig_validate_request( - processed_inputs, - params, - processed_inputs, # type: ignore - ) # type: ignore - - platform_validate_request = compat_validate_request - - self._platform_validate_request = platform_validate_request - @property def tokenizer(self) -> TokenizerLike | None: return self.renderer.tokenizer @@ -265,7 +238,7 @@ class InputProcessor: tokenization_kwargs=tokenization_kwargs, ) - self._platform_validate_request(processed_inputs, params) + current_platform.validate_request(processed_inputs, params) encoder_inputs, decoder_inputs = split_enc_dec_inputs(processed_inputs) self._validate_model_inputs(encoder_inputs, decoder_inputs) diff --git a/vllm/v1/request.py b/vllm/v1/request.py index 85ca90d99..f2ee33b49 100644 --- a/vllm/v1/request.py +++ b/vllm/v1/request.py @@ -9,7 +9,6 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Any import torch -from typing_extensions import deprecated from vllm.multimodal.inputs import MultiModalFeatureSpec from vllm.pooling_params import PoolingParams @@ -177,17 +176,6 @@ class Request: # None entry in the queue means finished. self.streaming_queue: deque[StreamingUpdate | None] | None = None - @property - @deprecated( - "Request.eos_token_id will be removed in v0.18. " - "Please use Request.sampling_params.eos_token_id instead." - ) - def eos_token_id(self) -> int | None: - if self.sampling_params is None: - return None - - return self.sampling_params.eos_token_id - @classmethod def from_engine_core_request( cls,