diff --git a/tests/entrypoints/pooling/embed/test_online.py b/tests/entrypoints/pooling/embed/test_online.py
index 89341670c..adec62334 100644
--- a/tests/entrypoints/pooling/embed/test_online.py
+++ b/tests/entrypoints/pooling/embed/test_online.py
@@ -683,13 +683,13 @@ async def test_params_not_supported(
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
-async def test_normalize(server: RemoteOpenAIServer, model_name: str):
-    async def get_outputs(normalize):
+async def test_use_activation(server: RemoteOpenAIServer, model_name: str):
+    async def get_outputs(use_activation):
         request_args = {
             "model": MODEL_NAME,
             "input": input_text,
             "encoding_format": "float",
-            "normalize": normalize,
+            "use_activation": use_activation,
         }
 
         response = requests.post(server.url_for("v1/embeddings"), json=request_args)
@@ -697,9 +697,9 @@ async def test_normalize(server: RemoteOpenAIServer, model_name: str):
 
         return torch.tensor([x["embedding"] for x in outputs["data"]])
 
-    default = await get_outputs(normalize=None)
-    w_normal = await get_outputs(normalize=True)
-    wo_normal = await get_outputs(normalize=False)
+    default = await get_outputs(use_activation=None)
+    w_normal = await get_outputs(use_activation=True)
+    wo_normal = await get_outputs(use_activation=False)
 
     assert torch.allclose(default, w_normal, atol=1e-2), "Default should use normal."
     assert not torch.allclose(w_normal, wo_normal, atol=1e-2), (
diff --git a/vllm/entrypoints/grpc_server.py b/vllm/entrypoints/grpc_server.py
index 1fc3354a4..ec8f4804b 100755
--- a/vllm/entrypoints/grpc_server.py
+++ b/vllm/entrypoints/grpc_server.py
@@ -101,11 +101,15 @@ class VllmEngineServicer(vllm_engine_pb2_grpc.VllmEngineServicer):
             sampling_params = self._sampling_params_from_proto(
                 request.sampling_params, stream=request.stream
             )
+            tokenization_kwargs = self._tokenization_kwargs_from_proto(
+                request.sampling_params
+            )
 
             async for output in self.async_llm.generate(
                 prompt=prompt,
                 sampling_params=sampling_params,
                 request_id=request_id,
+                tokenization_kwargs=tokenization_kwargs,
             ):
                 # Convert vLLM output to protobuf
                 # For streaming, always send chunks
@@ -308,9 +312,6 @@ class VllmEngineServicer(vllm_engine_pb2_grpc.VllmEngineServicer):
             seed=params.seed if params.HasField("seed") else None,
             include_stop_str_in_output=params.include_stop_str_in_output,
             logit_bias=dict(params.logit_bias) if params.logit_bias else None,
-            truncate_prompt_tokens=params.truncate_prompt_tokens
-            if params.HasField("truncate_prompt_tokens")
-            else None,
             structured_outputs=structured_outputs,
             # detokenize must be True if stop strings are used
             detokenize=bool(stop),
@@ -319,6 +320,14 @@ class VllmEngineServicer(vllm_engine_pb2_grpc.VllmEngineServicer):
             else RequestOutputKind.FINAL_ONLY,
         )
 
+    @staticmethod
+    def _tokenization_kwargs_from_proto(
+        params: vllm_engine_pb2.SamplingParams,
+    ) -> dict[str, int] | None:
+        if params.HasField("truncate_prompt_tokens"):
+            return {"truncate_prompt_tokens": params.truncate_prompt_tokens}
+        return None
+
     @staticmethod
     def _chunk_response(output: RequestOutput) -> vllm_engine_pb2.GenerateResponse:
         """
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index ee78d4d48..b3260f914 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import itertools
-import warnings
 from collections.abc import Callable, Iterable, Sequence
 from typing import TYPE_CHECKING, Any
 
@@ -1030,7 +1029,6 @@ class LLM:
         prompts: PromptType | Sequence[PromptType] | DataPrompt,
         pooling_params: PoolingParams | Sequence[PoolingParams] | None = None,
         *,
-        truncate_prompt_tokens: int | None = None,
         use_tqdm: bool | Callable[..., tqdm] = True,
         lora_request: list[LoRARequest] | LoRARequest | None = None,
         pooling_task: PoolingTask | None = None,
@@ -1088,20 +1086,6 @@ class LLM:
                 "pooling model."
             )
 
-        if truncate_prompt_tokens is not None:
-            warnings.warn(
-                "The `truncate_prompt_tokens` parameter in `LLM.encode()` "
-                "is deprecated and will be removed in v0.16. "
-                "Please pass it via `tokenization_kwargs` instead.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-
-            tokenization_kwargs = merge_kwargs(
-                tokenization_kwargs,
-                dict(truncate_prompt_tokens=truncate_prompt_tokens),
-            )
-
         if use_io_processor := (isinstance(prompts, dict) and "data" in prompts):
             if self.io_processor is None:
                 raise ValueError(
@@ -1185,7 +1169,6 @@ class LLM:
         self,
         prompts: PromptType | Sequence[PromptType],
         *,
-        truncate_prompt_tokens: int | None = None,
         use_tqdm: bool | Callable[..., tqdm] = True,
         pooling_params: PoolingParams | Sequence[PoolingParams] | None = None,
         lora_request: list[LoRARequest] | LoRARequest | None = None,
@@ -1221,12 +1204,6 @@ class LLM:
                 "Try converting the model using `--convert embed`."
             )
 
-        if truncate_prompt_tokens is not None:
-            tokenization_kwargs = merge_kwargs(
-                tokenization_kwargs,
-                dict(truncate_prompt_tokens=truncate_prompt_tokens),
-            )
-
         items = self.encode(
             prompts,
             use_tqdm=use_tqdm,
@@ -1294,7 +1271,6 @@ class LLM:
         /,
         *,
         pooling_params: PoolingParams | Sequence[PoolingParams] | None = None,
-        truncate_prompt_tokens: int | None = None,
         use_tqdm: bool | Callable[..., tqdm] = True,
         lora_request: list[LoRARequest] | LoRARequest | None = None,
         tokenization_kwargs: dict[str, Any] | None = None,
@@ -1319,13 +1295,11 @@ class LLM:
             A list of `PoolingRequestOutput` objects containing the
             pooled hidden states in the same order as the input prompts.
         """
-
         return self.encode(
             prompts,
             use_tqdm=use_tqdm,
             lora_request=lora_request,
             pooling_params=pooling_params,
-            truncate_prompt_tokens=truncate_prompt_tokens,
             pooling_task="token_classify",
             tokenization_kwargs=tokenization_kwargs,
         )
@@ -1771,23 +1745,15 @@ class LLM:
         seq_prompts = prompt_to_seq(prompts)
         seq_params = self._params_to_seq(params, len(seq_prompts))
         seq_lora_requests = self._lora_request_to_seq(lora_request, len(seq_prompts))
-        seq_tok_kwargs = [
-            merge_kwargs(
-                tokenization_kwargs,
-                dict(truncate_prompt_tokens=param.truncate_prompt_tokens),
-            )
-            for param in seq_params
-        ]
         seq_priority = self._priority_to_seq(priority, len(prompts))
 
         return self._render_and_add_requests(
             prompts=(
-                self._preprocess_cmpl_one(prompt, tok_kwargs)
-                for prompt, tok_kwargs in zip(
-                    maybe_tqdm(
-                        seq_prompts, use_tqdm=use_tqdm, desc="Rendering prompts"
-                    ),
-                    seq_tok_kwargs,
+                self._preprocess_cmpl_one(prompt, tokenization_kwargs)
+                for prompt in maybe_tqdm(
+                    seq_prompts,
+                    use_tqdm=use_tqdm,
+                    desc="Rendering prompts",
                 )
             ),
             params=seq_params,
@@ -1841,13 +1807,6 @@ class LLM:
         seq_convs = conversation_to_seq(messages)
         seq_params = self._params_to_seq(params, len(seq_convs))
         seq_lora_requests = self._lora_request_to_seq(lora_request, len(seq_convs))
-        seq_tok_kwargs = [
-            merge_kwargs(
-                tokenization_kwargs,
-                dict(truncate_prompt_tokens=param.truncate_prompt_tokens),
-            )
-            for param in seq_params
-        ]
 
         return self._render_and_run_requests(
             prompts=(
@@ -1859,16 +1818,13 @@ class LLM:
                     add_generation_prompt=add_generation_prompt,
                     continue_final_message=continue_final_message,
                     tools=tools,
-                    tokenization_kwargs=tok_kwargs,
+                    tokenization_kwargs=tokenization_kwargs,
                     mm_processor_kwargs=mm_processor_kwargs,
                 )
-                for conversation, tok_kwargs in zip(
-                    maybe_tqdm(
-                        seq_convs,
-                        use_tqdm=use_tqdm,
-                        desc="Rendering conversations",
-                    ),
-                    seq_tok_kwargs,
+                for conversation in maybe_tqdm(
+                    seq_convs,
+                    use_tqdm=use_tqdm,
+                    desc="Rendering conversations",
                 )
             ),
             params=seq_params,
diff --git a/vllm/entrypoints/openai/chat_completion/protocol.py b/vllm/entrypoints/openai/chat_completion/protocol.py
index 12bbc44a0..edba28a59 100644
--- a/vllm/entrypoints/openai/chat_completion/protocol.py
+++ b/vllm/entrypoints/openai/chat_completion/protocol.py
@@ -490,7 +490,6 @@ class ChatCompletionRequest(OpenAIBaseModel):
             skip_special_tokens=self.skip_special_tokens,
             spaces_between_special_tokens=self.spaces_between_special_tokens,
             include_stop_str_in_output=self.include_stop_str_in_output,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
             output_kind=RequestOutputKind.DELTA
             if self.stream
             else RequestOutputKind.FINAL_ONLY,
diff --git a/vllm/entrypoints/openai/completion/protocol.py b/vllm/entrypoints/openai/completion/protocol.py
index 02e6e0d03..222640439 100644
--- a/vllm/entrypoints/openai/completion/protocol.py
+++ b/vllm/entrypoints/openai/completion/protocol.py
@@ -302,7 +302,6 @@ class CompletionRequest(OpenAIBaseModel):
             skip_special_tokens=self.skip_special_tokens,
             spaces_between_special_tokens=self.spaces_between_special_tokens,
             include_stop_str_in_output=self.include_stop_str_in_output,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
             output_kind=RequestOutputKind.DELTA
             if self.stream
             else RequestOutputKind.FINAL_ONLY,
diff --git a/vllm/entrypoints/openai/translations/__init__.py b/vllm/entrypoints/openai/translations/__init__.py
deleted file mode 100644
index cf210d505..000000000
--- a/vllm/entrypoints/openai/translations/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import warnings
-
-warnings.warn(
-    "The 'vllm.entrypoints.openai.translations' module has been renamed to "
-    "'vllm.entrypoints.openai.speech_to_text'. Please update your imports. "
-    "This backward-compatible alias will be removed in version 0.17+.",
-    DeprecationWarning,
-    stacklevel=2,
-)
diff --git a/vllm/entrypoints/openai/translations/api_router.py b/vllm/entrypoints/openai/translations/api_router.py
deleted file mode 100644
index 4a43bf8b9..000000000
--- a/vllm/entrypoints/openai/translations/api_router.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import warnings
-
-warnings.warn(
-    "'vllm.entrypoints.openai.translations.api_router' has been moved to "
-    "'vllm.entrypoints.openai.speech_to_text.api_router'. Please update your "
-    "imports. This backward-compatible alias will be removed in version 0.17+.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from vllm.entrypoints.openai.speech_to_text.api_router import *  # noqa: F401,F403,E402
diff --git a/vllm/entrypoints/openai/translations/protocol.py b/vllm/entrypoints/openai/translations/protocol.py
deleted file mode 100644
index c8ec156d9..000000000
--- a/vllm/entrypoints/openai/translations/protocol.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import warnings
-
-warnings.warn(
-    "'vllm.entrypoints.openai.translations.protocol' has been moved to "
-    "'vllm.entrypoints.openai.speech_to_text.protocol'. Please update your "
-    "imports. This backward-compatible alias will be removed in version 0.17+.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from vllm.entrypoints.openai.speech_to_text.protocol import *  # noqa: F401,F403,E402
diff --git a/vllm/entrypoints/openai/translations/serving.py b/vllm/entrypoints/openai/translations/serving.py
deleted file mode 100644
index 1749d6155..000000000
--- a/vllm/entrypoints/openai/translations/serving.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import warnings
-
-warnings.warn(
-    "'vllm.entrypoints.openai.translations.serving' has been moved to "
-    "'vllm.entrypoints.openai.speech_to_text.serving'. Please update your "
-    "imports. This backward-compatible alias will be removed in version 0.17+.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from vllm.entrypoints.openai.speech_to_text.serving import *  # noqa: F401,F403,E402
diff --git a/vllm/entrypoints/openai/translations/speech_to_text.py b/vllm/entrypoints/openai/translations/speech_to_text.py
deleted file mode 100644
index eb26c6a83..000000000
--- a/vllm/entrypoints/openai/translations/speech_to_text.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import warnings
-
-warnings.warn(
-    "'vllm.entrypoints.openai.translations.speech_to_text' has been moved to "
-    "'vllm.entrypoints.openai.speech_to_text.speech_to_text'. Please update "
-    "your imports. This backward-compatible alias will be removed in version "
-    "0.17+.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-
-from vllm.entrypoints.openai.speech_to_text.speech_to_text import *  # noqa: F401,F403,E402
diff --git a/vllm/entrypoints/pooling/base/protocol.py b/vllm/entrypoints/pooling/base/protocol.py
index 86dc12cbd..53945108d 100644
--- a/vllm/entrypoints/pooling/base/protocol.py
+++ b/vllm/entrypoints/pooling/base/protocol.py
@@ -190,10 +190,6 @@ class EmbedRequestMixin(EncodingRequestMixin):
         description="Whether to use activation for the pooler outputs. "
         "`None` uses the pooler's default, which is `True` in most cases.",
     )
-    normalize: bool | None = Field(
-        default=None,
-        description="Deprecated; please pass `use_activation` instead",
-    )
     # --8<-- [end:embed-extra-params]
 
 
diff --git a/vllm/entrypoints/pooling/classify/protocol.py b/vllm/entrypoints/pooling/classify/protocol.py
index 3c4bbd8c2..bfc38ebef 100644
--- a/vllm/entrypoints/pooling/classify/protocol.py
+++ b/vllm/entrypoints/pooling/classify/protocol.py
@@ -40,7 +40,6 @@ class ClassificationCompletionRequest(
     def to_pooling_params(self):
         return PoolingParams(
             task="classify",
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
             use_activation=self.use_activation,
         )
 
@@ -63,7 +62,6 @@ class ClassificationChatRequest(
     def to_pooling_params(self):
         return PoolingParams(
             task="classify",
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
             use_activation=self.use_activation,
         )
 
diff --git a/vllm/entrypoints/pooling/embed/protocol.py b/vllm/entrypoints/pooling/embed/protocol.py
index 4f83105f2..4b47c6522 100644
--- a/vllm/entrypoints/pooling/embed/protocol.py
+++ b/vllm/entrypoints/pooling/embed/protocol.py
@@ -14,12 +14,9 @@ from vllm.entrypoints.pooling.base.protocol import (
     EmbedRequestMixin,
     PoolingBasicRequestMixin,
 )
-from vllm.logger import init_logger
 from vllm.renderers import TokenizeParams
 from vllm.utils import random_uuid
 
-logger = init_logger(__name__)
-
 
 def _get_max_total_output_tokens(
     model_config: ModelConfig,
@@ -60,18 +57,10 @@ class EmbeddingCompletionRequest(
         )
 
     def to_pooling_params(self):
-        if self.normalize is not None:
-            logger.warning_once(
-                "`normalize` is deprecated and will be removed in v0.17. "
-                "Please pass `use_activation` instead."
-            )
-            self.use_activation = self.normalize
-
         return PoolingParams(
             task="embed",
             dimensions=self.dimensions,
             use_activation=self.use_activation,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
         )
 
 
@@ -97,18 +86,10 @@ class EmbeddingChatRequest(
         )
 
     def to_pooling_params(self):
-        if self.normalize is not None:
-            logger.warning_once(
-                "`normalize` is deprecated and will be removed in v0.17. "
-                "Please pass `use_activation` instead."
-            )
-            self.use_activation = self.normalize
-
         return PoolingParams(
             task="embed",
             dimensions=self.dimensions,
             use_activation=self.use_activation,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
         )
 
 
diff --git a/vllm/entrypoints/pooling/pooling/protocol.py b/vllm/entrypoints/pooling/pooling/protocol.py
index a8c1c59ff..b99f98959 100644
--- a/vllm/entrypoints/pooling/pooling/protocol.py
+++ b/vllm/entrypoints/pooling/pooling/protocol.py
@@ -16,13 +16,10 @@ from vllm.entrypoints.pooling.base.protocol import (
     EncodingRequestMixin,
     PoolingBasicRequestMixin,
 )
-from vllm.logger import init_logger
 from vllm.renderers import TokenizeParams
 from vllm.tasks import PoolingTask
 from vllm.utils import random_uuid
 
-logger = init_logger(__name__)
-
 
 class PoolingCompletionRequest(
     PoolingBasicRequestMixin,
@@ -45,16 +42,8 @@ class PoolingCompletionRequest(
         )
 
     def to_pooling_params(self):
-        if self.normalize is not None:
-            logger.warning_once(
-                "`normalize` is deprecated and will be removed in v0.17. "
-                "Please pass `use_activation` instead."
-            )
-            self.use_activation = self.normalize
-
         return PoolingParams(
             task=self.task,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
             use_activation=self.use_activation,
             dimensions=self.dimensions,
         )
@@ -78,16 +67,8 @@ class PoolingChatRequest(
         )
 
     def to_pooling_params(self):
-        if self.normalize is not None:
-            logger.warning_once(
-                "`normalize` is deprecated and will be removed in v0.17. "
-                "Please pass `use_activation` instead."
-            )
-            self.use_activation = self.normalize
-
         return PoolingParams(
             task=self.task,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
             use_activation=self.use_activation,
             dimensions=self.dimensions,
         )
diff --git a/vllm/entrypoints/pooling/score/protocol.py b/vllm/entrypoints/pooling/score/protocol.py
index a85ed5d70..643eeed36 100644
--- a/vllm/entrypoints/pooling/score/protocol.py
+++ b/vllm/entrypoints/pooling/score/protocol.py
@@ -37,7 +37,6 @@ class ScoreRequestMixin(PoolingBasicRequestMixin, ClassifyRequestMixin):
     def to_pooling_params(self, task: PoolingTask = "score"):
         return PoolingParams(
             task=task,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
             use_activation=self.use_activation,
         )
 
@@ -113,7 +112,6 @@ class RerankRequest(PoolingBasicRequestMixin, ClassifyRequestMixin):
     def to_pooling_params(self, task: PoolingTask = "score"):
         return PoolingParams(
             task=task,
-            truncate_prompt_tokens=self.truncate_prompt_tokens,
             use_activation=self.use_activation,
         )
 
diff --git a/vllm/model_executor/layers/mamba/mamba_utils.py b/vllm/model_executor/layers/mamba/mamba_utils.py
index fc8912f8c..1f6751f6c 100644
--- a/vllm/model_executor/layers/mamba/mamba_utils.py
+++ b/vllm/model_executor/layers/mamba/mamba_utils.py
@@ -289,9 +289,6 @@ def get_temporal_copy_spec(
     )
 
 
-get_full_copy_spec = get_temporal_copy_spec
-
-
 class MambaStateCopyFuncCalculator:
     @classmethod
     def linear_attention_state_copy_func(cls):
diff --git a/vllm/model_executor/models/ovis2_5.py b/vllm/model_executor/models/ovis2_5.py
index 2d9385c57..57559ba99 100644
--- a/vllm/model_executor/models/ovis2_5.py
+++ b/vllm/model_executor/models/ovis2_5.py
@@ -43,12 +43,9 @@ from vllm.utils.tensor_schema import TensorSchema, TensorShape
 from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
 
 IMAGE_TOKEN = "<image>"
-IMAGE_PLACEHOLDER_ID = 151669
 VIDEO_TOKEN = "<video>"
-VIDEO_PLACEHOLDER_ID = 151670
 INDICATOR_IDS = [151672, 151673, 151674, 151675]
 IMAGE_PAD_TOKEN_ID = 151655
-THINK_END_TOKEN_ID = 151668
 
 
 class Ovis2_5ImagePatchInputs(TensorSchema):
diff --git a/vllm/multimodal/processing/processor.py b/vllm/multimodal/processing/processor.py
index 67d3ab32d..84720a554 100644
--- a/vllm/multimodal/processing/processor.py
+++ b/vllm/multimodal/processing/processor.py
@@ -17,7 +17,7 @@ from typing import (
 
 import regex as re
 import torch
-from typing_extensions import TypeVar, assert_never, deprecated
+from typing_extensions import TypeVar, assert_never
 
 from vllm.logger import init_logger
 from vllm.tokenizers import TokenizerLike
@@ -996,16 +996,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
 
         self.data_parser = self.info.get_data_parser()
 
-    @property
-    @deprecated("Will be removed in v0.17. Use `info.supported_mm_limits` instead.")
-    def supported_mm_limits(self):
-        return self.info.supported_mm_limits
-
-    @property
-    @deprecated("Will be removed in v0.17. Use `info.allowed_mm_limits` instead.")
-    def allowed_mm_limits(self):
-        return self.info.allowed_mm_limits
-
     def __call__(
         self,
         prompt: str,
diff --git a/vllm/multimodal/utils.py b/vllm/multimodal/utils.py
index d94faa675..886756c99 100644
--- a/vllm/multimodal/utils.py
+++ b/vllm/multimodal/utils.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import mimetypes
-import warnings
 from collections import defaultdict
 from collections.abc import Generator, Sequence
 from itertools import groupby
@@ -30,23 +29,6 @@ else:
     torch = LazyLoader("torch", globals(), "torch")
 
 
-def __getattr__(name: str):
-    if name == "MEDIA_CONNECTOR_REGISTRY":
-        from .media import MEDIA_CONNECTOR_REGISTRY
-
-        warnings.warn(
-            "`vllm.multimodal.utils.MEDIA_CONNECTOR_REGISTRY` "
-            "has been moved to `vllm.multimodal.media.MEDIA_CONNECTOR_REGISTRY`. "
-            "The old name will be removed in v0.17.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-
-        return MEDIA_CONNECTOR_REGISTRY
-
-    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
-
-
 def encode_audio_base64(
     audio: np.ndarray,
     sampling_rate: int,
diff --git a/vllm/pooling_params.py b/vllm/pooling_params.py
index 75d441d74..487a93839 100644
--- a/vllm/pooling_params.py
+++ b/vllm/pooling_params.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from copy import deepcopy
-from typing import Annotated, Any
+from typing import Any
 
 import msgspec
 
@@ -19,10 +19,6 @@ class PoolingParams(
     """API parameters for pooling models.
 
     Attributes:
-        truncate_prompt_tokens: Controls prompt truncation.
-            Set to -1 to use the model's default truncation size.
-            Set to k to keep only the last k tokens (left truncation).
-            Set to None to disable truncation.
         use_activation: Whether to apply activation function to the pooler outputs.
             `None` uses the pooler's default, which is `True` in most cases.
         dimensions: Reduce the dimensions of embeddings
@@ -30,7 +26,6 @@ class PoolingParams(
     """
 
     # --8<-- [start:common-pooling-params]
-    truncate_prompt_tokens: Annotated[int, msgspec.Meta(ge=-1)] | None = None
     use_activation: bool | None = None
     # --8<-- [end:common-pooling-params]
 
@@ -198,7 +193,6 @@ class PoolingParams(
             f"returned_token_ids={self.returned_token_ids}, "
             f"requires_token_ids={self.requires_token_ids}, "
             f"skip_reading_prefix_cache={self.skip_reading_prefix_cache}, "
-            f"truncate_prompt_tokens={self.truncate_prompt_tokens}, "
             f"extra_kwargs={self.extra_kwargs})"
         )
 
diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py
index e36a90d6c..866202950 100644
--- a/vllm/sampling_params.py
+++ b/vllm/sampling_params.py
@@ -7,7 +7,7 @@ import json as json_mod
 from dataclasses import field
 from enum import Enum, IntEnum
 from functools import cached_property
-from typing import Annotated, Any
+from typing import Any
 
 import msgspec
 from pydantic.dataclasses import dataclass
@@ -209,10 +209,6 @@ class SamplingParams(
     """Whether to add spaces between special tokens in the output."""
     include_stop_str_in_output: bool = False
     """Whether to include the stop strings in output text."""
-    truncate_prompt_tokens: Annotated[int, msgspec.Meta(ge=-1)] | None = None
-    """If set to -1, will use the truncation size supported by the model. If
-    set to an integer k, will use only the last k tokens from the prompt
-    (i.e., left truncation). If set to `None`, truncation is disabled."""
     output_kind: RequestOutputKind = RequestOutputKind.CUMULATIVE
     skip_clone: bool = False
     """Internal flag indicating that this SamplingParams instance is safe to
@@ -273,7 +269,6 @@ class SamplingParams(
         detokenize: bool = True,
         skip_special_tokens: bool = True,
         spaces_between_special_tokens: bool = True,
-        truncate_prompt_tokens: Annotated[int, msgspec.Meta(ge=-1)] | None = None,
         output_kind: RequestOutputKind = RequestOutputKind.CUMULATIVE,
         structured_outputs: StructuredOutputsParams | None = None,
         logit_bias: dict[int, float] | dict[str, float] | None = None,
@@ -313,7 +308,6 @@ class SamplingParams(
             detokenize=detokenize,
             skip_special_tokens=skip_special_tokens,
             spaces_between_special_tokens=spaces_between_special_tokens,
-            truncate_prompt_tokens=truncate_prompt_tokens,
             output_kind=output_kind,
             structured_outputs=structured_outputs,
             logit_bias=logit_bias,
@@ -449,15 +443,6 @@ class SamplingParams(
                 parameter="prompt_logprobs",
                 value=self.prompt_logprobs,
             )
-        if self.truncate_prompt_tokens is not None and (
-            self.truncate_prompt_tokens == 0 or self.truncate_prompt_tokens < -1
-        ):
-            raise VLLMValidationError(
-                f"truncate_prompt_tokens must be an integer >= 1 or -1, "
-                f"got {self.truncate_prompt_tokens}",
-                parameter="truncate_prompt_tokens",
-                value=self.truncate_prompt_tokens,
-            )
         assert isinstance(self.stop_token_ids, list)
         if not all(isinstance(st_id, int) for st_id in self.stop_token_ids):
             raise ValueError(
@@ -835,7 +820,6 @@ class SamplingParams(
             f"skip_special_tokens={self.skip_special_tokens}, "
             "spaces_between_special_tokens="
             f"{self.spaces_between_special_tokens}, "
-            f"truncate_prompt_tokens={self.truncate_prompt_tokens}, "
             f"structured_outputs={self.structured_outputs}, "
             f"extra_args={self.extra_args})"
         )
diff --git a/vllm/v1/engine/input_processor.py b/vllm/v1/engine/input_processor.py
index b4b193abb..ad70f839d 100644
--- a/vllm/v1/engine/input_processor.py
+++ b/vllm/v1/engine/input_processor.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import time
-import warnings
 from collections.abc import Mapping
 from typing import Any, Literal
 
@@ -114,16 +113,6 @@ class InputProcessor:
         supported_tasks: tuple[SupportedTask, ...],
     ) -> None:
         """Raise `ValueError` if SamplingParams or PoolingParams is not valid."""
-        if params.truncate_prompt_tokens is not None:
-            params_type = type(params).__name__
-            warnings.warn(
-                f"The `truncate_prompt_tokens` parameter in `{params_type}` "
-                "is deprecated and will be removed in v0.17. "
-                "Please pass it via `tokenization_kwargs` instead.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-
         if isinstance(params, SamplingParams):
             supported_generation_tasks = [
                 task for task in supported_tasks if task in GENERATION_TASKS