[Renderer] Deprecate code paths for old input processing (#34775)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-02-18 16:35:04 +08:00
parent 1faa8cb73c
commit a766b30349
6 changed files with 70 additions and 51 deletions
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@@ -27,7 +27,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
 from vllm.outputs import STREAM_FINISHED, PoolingRequestOutput, RequestOutput
 from vllm.plugins.io_processors import get_io_processor
 from vllm.pooling_params import PoolingParams
-from vllm.renderers import merge_kwargs, renderer_from_config
+from vllm.renderers import renderer_from_config
 from vllm.renderers.inputs.preprocess import extract_prompt_components
 from vllm.sampling_params import RequestOutputKind, SamplingParams
 from vllm.tasks import SupportedTask
@@ -319,21 +319,6 @@ class AsyncLLM(EngineClient):
                "prompt logprobs"
            )

-        if params.truncate_prompt_tokens is not None:
-            params_type = type(params).__name__
-            warnings.warn(
-                f"The `truncate_prompt_tokens` parameter in `{params_type}` "
-                "is deprecated and will be removed in v0.16. "
-                "Please pass it via `tokenization_kwargs` instead.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-
-            tokenization_kwargs = merge_kwargs(
-                tokenization_kwargs,
-                dict(truncate_prompt_tokens=params.truncate_prompt_tokens),
-            )
-
        if isinstance(prompt, AsyncGenerator):
            if reasoning_ended is not None:
                raise NotImplementedError
@@ -353,6 +338,12 @@ class AsyncLLM(EngineClient):

        # Convert Input --> Request.
        if isinstance(prompt, EngineCoreRequest):
+            logger.warning_once(
+                "Passing EngineCoreRequest to AsyncLLM.generate() and .add_requests() "
+                "is deprecated and will be removed in v0.18. You should instead pass "
+                "the outputs of Renderer.render_cmpl() or Renderer.render_chat()."
+            )
+
            request = prompt
            if request_id != request.request_id:
                logger.warning_once(
--- a/vllm/v1/engine/input_processor.py
+++ b/vllm/v1/engine/input_processor.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

 import time
+import warnings
 from collections.abc import Mapping
 from typing import Any, Literal

@@ -28,6 +29,7 @@ from vllm.sampling_params import SamplingParams
 from vllm.tasks import POOLING_TASKS, SupportedTask
 from vllm.tokenizers import TokenizerLike
 from vllm.utils import length_from_prompt_token_ids_or_embeds, random_uuid
+from vllm.utils.func_utils import supports_kw
 from vllm.utils.jsontree import json_iter_leaves
 from vllm.v1.engine import EngineCoreRequest

@@ -72,6 +74,33 @@ class InputProcessor:
            mm_registry=mm_registry,
        )

+        from vllm.platforms import current_platform
+
+        platform_validate_request = current_platform.validate_request
+        if supports_kw(platform_validate_request, "prompt"):
+            logger.warning_once(
+                "The signature of Platform.validate_request has changed from "
+                "`(cls, prompt, params, processed_inputs) -> None` to "
+                "`(cls, processed_inputs, params) -> None`. The old signature "
+                "will no longer be supported starting from v0.18."
+            )
+
+            orig_validate_request = platform_validate_request
+
+            def compat_validate_request(
+                processed_inputs: ProcessorInputs,
+                params: SamplingParams | PoolingParams,
+            ):
+                return orig_validate_request(
+                    processed_inputs,
+                    params,
+                    processed_inputs,  # type: ignore
+                )  # type: ignore
+
+            platform_validate_request = compat_validate_request
+
+        self._platform_validate_request = platform_validate_request
+
    @property
    def tokenizer(self) -> TokenizerLike | None:
        return self.renderer.tokenizer
@@ -87,6 +116,16 @@ class InputProcessor:
        supported_tasks: tuple[SupportedTask, ...] | None,
    ):
        """Raise `ValueError` if SamplingParams or PoolingParams is not valid."""
+        if params.truncate_prompt_tokens is not None:
+            params_type = type(params).__name__
+            warnings.warn(
+                f"The `truncate_prompt_tokens` parameter in `{params_type}` "
+                "is deprecated and will be removed in v0.17. "
+                "Please pass it via `tokenization_kwargs` instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+
        if isinstance(params, SamplingParams):
            params.verify(
                self.model_config,
@@ -211,11 +250,24 @@ class InputProcessor:
            )

        if isinstance(prompt, dict) and "type" in prompt:
+            if tokenization_kwargs:
+                logger.warning_once(
+                    "Passing tokenization_kwargs to InputProcessor is deprecated "
+                    "and will be removed in v0.18. You should instead pass "
+                    "them to Renderer.render_cmpl() or Renderer.render_chat()."
+                )
+
            if arrival_time is None:
                arrival_time = prompt.get("arrival_time", time.time())  # type: ignore[assignment]

            processed_inputs: ProcessorInputs = prompt  # type: ignore[assignment]
        else:
+            logger.warning_once(
+                "Passing raw prompts to InputProcessor is deprecated "
+                "and will be removed in v0.18. You should instead pass "
+                "the outputs of Renderer.render_cmpl() or Renderer.render_chat()."
+            )
+
            if arrival_time is None:
                arrival_time = time.time()

@@ -224,13 +276,7 @@ class InputProcessor:
                tokenization_kwargs=tokenization_kwargs,
            )

-        from vllm.platforms import current_platform
-
-        current_platform.validate_request(
-            prompt=prompt,
-            params=params,
-            processed_inputs=processed_inputs,
-        )
+        self._platform_validate_request(processed_inputs, params)

        encoder_inputs, decoder_inputs = split_enc_dec_inputs(processed_inputs)
        self._validate_model_inputs(encoder_inputs, decoder_inputs)
--- a/vllm/v1/engine/llm_engine.py
+++ b/vllm/v1/engine/llm_engine.py
@@ -234,10 +234,16 @@ class LLMEngine:

        # Process raw inputs into the request.
        if isinstance(prompt, EngineCoreRequest):
+            logger.warning_once(
+                "Passing EngineCoreRequest to LLMEngine.generate() and .add_requests() "
+                "is deprecated and will be removed in v0.18. You should instead pass "
+                "the outputs of Renderer.render_cmpl() or Renderer.render_chat()."
+            )
+
            request = prompt
            if request_id != request.request_id:
                logger.warning_once(
-                    "AsyncLLM.add_request() was passed a request_id parameter that "
+                    "LLMEngine.add_request() was passed a request_id parameter that "
                    "does not match the EngineCoreRequest.request_id attribute. The "
                    "latter will be used, and the former will be ignored."
                )