[Deprecation] Remove deprecated items related to pooling (#33477)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-02-01 00:44:40 +08:00
parent 27cb2f678f
commit 92924b2ddd
8 changed files with 52 additions and 105 deletions
--- a/docs/models/pooling_models.md
+++ b/docs/models/pooling_models.md
@@ -352,15 +352,6 @@ We have split the `encode` task into two more specific token-wise tasks: `token_
 - `token_embed` is the same as `embed`, using normalization as the activation.
 - `token_classify` is the same as `classify`, by default using softmax as the activation.

-### Remove softmax from PoolingParams
-
-We are going to remove `softmax` and `activation` from `PoolingParams` in v0.15. Instead, use `use_activation`, since we allow `classify` and `token_classify` to use any activation function.
-
-### as_reward_model
-
-!!! warning
-    We are going to remove `--convert reward` in v0.15, use `--convert embed` instead.
-
 Pooling models now default support all pooling, you can use it without any settings.

 - Extracting hidden states prefers using `token_embed` task.
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -75,7 +75,7 @@ else:
 logger = init_logger(__name__)

 RunnerOption = Literal["auto", RunnerType]
-ConvertType = Literal["none", "embed", "classify", "reward", "mm_encoder_only"]
+ConvertType = Literal["none", "embed", "classify"]
 ConvertOption = Literal["auto", ConvertType]
 TokenizerMode = Literal["auto", "hf", "slow", "mistral", "deepseek_v32"]
 ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"]
@@ -499,15 +499,6 @@ class ModelConfig:
        )
        self.model_arch_config = self.get_model_arch_config()

-        if self.convert == "mm_encoder_only":
-            logger.warning_once(
-                "`--convert mm_encoder_only` is deprecated and "
-                "will be removed in v0.15. "
-                "Please use --mm-encoder-only` instead."
-            )
-            mm_encoder_only = True
-            self.convert = "none"
-
        architectures = self.architectures
        registry = self.registry
        is_generative_model = registry.is_text_generation_model(architectures, self)
@@ -855,13 +846,6 @@ class ModelConfig:
        runner_type: RunnerType,
        convert: ConvertOption,
    ) -> ConvertType:
-        if convert == "reward":
-            logger.warning(
-                "`--convert reward` is deprecated and will be removed in v0.15. "
-                "Please use `--convert embed` instead."
-            )
-            return "embed"
-
        if convert != "auto":
            return convert

--- a/vllm/config/pooler.py
+++ b/vllm/config/pooler.py
@@ -45,11 +45,13 @@ class PoolerConfig:
    The pooling method used for tokenwise pooling.
    """

-    ## for embeddings models
-    normalize: bool | None = None
+    use_activation: bool | None = None
    """
-    DEPRECATED: please use `use_activation` instead.
+    Whether to apply activation function to the pooler outputs.
+    `None` uses the pooler's default, which is `True` in most cases.
    """
+
+    ## for embedding models
    dimensions: int | None = None
    """
    Reduce the dimensions of embeddings if model
@@ -73,19 +75,6 @@ class PoolerConfig:
    """

    ## for classification models
-    softmax: float | None = None
-    """
-    DEPRECATED: please use `use_activation` instead.
-    """
-    activation: float | None = None
-    """
-    DEPRECATED: please use `use_activation` instead.
-    """
-    use_activation: bool | None = None
-    """
-    Whether to apply activation function to the classification outputs.
-    Defaults to True.
-    """
    logit_bias: float | None = None
    """
    If provided, apply classification logit biases. Defaults to None.
@@ -105,10 +94,7 @@ class PoolerConfig:
    `math-shepherd-mistral-7b-prm` model.
    """

-    def __post_init__(self):
-        # raise deprecated warning for softmax and activation
-        self.use_activation = get_use_activation(self)
-
+    def __post_init__(self) -> None:
        if pooling_type := self.pooling_type:
            if self.seq_pooling_type is not None:
                raise ValueError(
@@ -161,28 +147,3 @@ class PoolerConfig:
        factors: list[Any] = []
        hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest()
        return hash_str
-
-
-def get_use_activation(o: object):
-    if (normalize := getattr(o, "normalize", None)) is not None:
-        logger.warning_once(
-            "`normalize` is deprecated and will be removed in v0.15. "
-            "Please use `use_activation` instead."
-        )
-        return normalize
-
-    if (softmax := getattr(o, "softmax", None)) is not None:
-        logger.warning_once(
-            "`softmax` is deprecated and will be removed in v0.15. "
-            "Please use `use_activation` instead."
-        )
-        return softmax
-
-    if (activation := getattr(o, "activation", None)) is not None:
-        logger.warning_once(
-            "`activation` is deprecated and will be removed in v0.15. "
-            "Please use `use_activation` instead."
-        )
-        return activation
-
-    return getattr(o, "use_activation", None)
--- a/vllm/entrypoints/pooling/base/protocol.py
+++ b/vllm/entrypoints/pooling/base/protocol.py
@@ -7,16 +7,18 @@ from typing import Annotated, Any
 from pydantic import Field, model_validator

 from vllm import PoolingParams
-from vllm.config.pooler import get_use_activation
 from vllm.entrypoints.chat_utils import (
    ChatCompletionMessageParam,
    ChatTemplateContentFormatOption,
 )
 from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel
+from vllm.logger import init_logger
 from vllm.renderers import ChatParams, merge_kwargs
 from vllm.utils import random_uuid
 from vllm.utils.serial_utils import EmbedDType, EncodingFormat, Endianness

+logger = init_logger(__name__)
+

 class PoolingBasicRequestMixin(OpenAIBaseModel):
    # --8<-- [start:pooling-common-params]
@@ -172,39 +174,43 @@ class EmbedRequestMixin(EncodingRequestMixin):
    # --8<-- [end:embed-params]

    # --8<-- [start:embed-extra-params]
+    use_activation: bool | None = Field(
+        default=None,
+        description="Whether to use activation for the pooler outputs. "
+        "`None` uses the pooler's default, which is `True` in most cases.",
+    )
    normalize: bool | None = Field(
        default=None,
-        description="Whether to normalize the embeddings outputs. Default is True.",
+        description="Deprecated; please pass `use_activation` instead",
    )
    # --8<-- [end:embed-extra-params]

    def to_pooling_params(self):
+        if self.normalize is not None:
+            logger.warning_once(
+                "`normalize` is deprecated and will be removed in v0.17. "
+                "Please pass `use_activation` instead."
+            )
+            self.use_activation = self.normalize
+
        return PoolingParams(
            dimensions=self.dimensions,
-            use_activation=self.normalize,
+            use_activation=self.use_activation,
            truncate_prompt_tokens=getattr(self, "truncate_prompt_tokens", None),
        )


 class ClassifyRequestMixin(OpenAIBaseModel):
    # --8<-- [start:classify-extra-params]
-    softmax: bool | None = Field(
-        default=None,
-        description="softmax will be deprecated, please use use_activation instead.",
-    )
-    activation: bool | None = Field(
-        default=None,
-        description="activation will be deprecated, please use use_activation instead.",
-    )
    use_activation: bool | None = Field(
        default=None,
-        description="Whether to use activation for classification outputs. "
-        "Default is True.",
+        description="Whether to use activation for the pooler outputs. "
+        "`None` uses the pooler's default, which is `True` in most cases.",
    )
    # --8<-- [end:classify-extra-params]

    def to_pooling_params(self):
        return PoolingParams(
-            use_activation=get_use_activation(self),
+            use_activation=self.use_activation,
            truncate_prompt_tokens=getattr(self, "truncate_prompt_tokens", None),
        )
--- a/vllm/entrypoints/pooling/pooling/protocol.py
+++ b/vllm/entrypoints/pooling/pooling/protocol.py
@@ -7,7 +7,6 @@ from pydantic import Field

 from vllm import PoolingParams
 from vllm.config import ModelConfig
-from vllm.config.pooler import get_use_activation
 from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo
 from vllm.entrypoints.pooling.base.protocol import (
    ChatRequestMixin,
@@ -17,10 +16,13 @@ from vllm.entrypoints.pooling.base.protocol import (
    EncodingRequestMixin,
    PoolingBasicRequestMixin,
 )
+from vllm.logger import init_logger
 from vllm.renderers import TokenizeParams
 from vllm.tasks import PoolingTask
 from vllm.utils import random_uuid

+logger = init_logger(__name__)
+

 class PoolingCompletionRequest(
    PoolingBasicRequestMixin,
@@ -43,10 +45,17 @@ class PoolingCompletionRequest(
        )

    def to_pooling_params(self):
+        if self.normalize is not None:
+            logger.warning_once(
+                "`normalize` is deprecated and will be removed in v0.17. "
+                "Please pass `use_activation` instead."
+            )
+            self.use_activation = self.normalize
+
        return PoolingParams(
            truncate_prompt_tokens=self.truncate_prompt_tokens,
+            use_activation=self.use_activation,
            dimensions=self.dimensions,
-            use_activation=get_use_activation(self),
        )


@@ -73,10 +82,17 @@ class PoolingChatRequest(
        )

    def to_pooling_params(self):
+        if self.normalize is not None:
+            logger.warning_once(
+                "`normalize` is deprecated and will be removed in v0.17. "
+                "Please pass `use_activation` instead."
+            )
+            self.use_activation = self.normalize
+
        return PoolingParams(
            truncate_prompt_tokens=self.truncate_prompt_tokens,
+            use_activation=self.use_activation,
            dimensions=self.dimensions,
-            use_activation=get_use_activation(self),
        )


--- a/vllm/entrypoints/pooling/score/protocol.py
+++ b/vllm/entrypoints/pooling/score/protocol.py
@@ -7,7 +7,6 @@ from pydantic import BaseModel, Field

 from vllm import PoolingParams
 from vllm.config import ModelConfig
-from vllm.config.pooler import get_use_activation
 from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo
 from vllm.entrypoints.pooling.base.protocol import (
    ClassifyRequestMixin,
@@ -43,7 +42,7 @@ class ScoreRequestMixin(PoolingBasicRequestMixin, ClassifyRequestMixin):
    def to_pooling_params(self):
        return PoolingParams(
            truncate_prompt_tokens=self.truncate_prompt_tokens,
-            use_activation=get_use_activation(self),
+            use_activation=self.use_activation,
        )


--- a/vllm/model_executor/models/config.py
+++ b/vllm/model_executor/models/config.py
@@ -233,8 +233,8 @@ class Qwen2ForRewardModelConfig(VerifyAndUpdateConfig):
    def verify_and_update_model_config(model_config: "ModelConfig") -> None:
        pooler_config = model_config.pooler_config

-        if pooler_config.softmax is None:
-            pooler_config.softmax = False
+        if pooler_config.use_activation is None:
+            pooler_config.use_activation = False


 class Qwen3ForSequenceClassificationConfig(VerifyAndUpdateConfig):
--- a/vllm/pooling_params.py
+++ b/vllm/pooling_params.py
@@ -7,7 +7,6 @@ from typing import Annotated, Any
 import msgspec

 from vllm.config import ModelConfig, PoolerConfig
-from vllm.config.pooler import get_use_activation
 from vllm.sampling_params import RequestOutputKind
 from vllm.tasks import PoolingTask

@@ -24,30 +23,24 @@ class PoolingParams(
            Set to -1 to use the model's default truncation size.
            Set to k to keep only the last k tokens (left truncation).
            Set to None to disable truncation.
+        use_activation: Whether to apply activation function to the pooler outputs.
+            `None` uses the pooler's default, which is `True` in most cases.
        dimensions: Reduce the dimensions of embeddings
            if model support matryoshka representation.
-        normalize: Deprecated, please use use_activation instead.
-        softmax: Deprecated, please use use_activation instead.
-        activation: Deprecated, please use use_activation instead.
-        use_activation: Whether to apply activation function to
-            the classification outputs.
    """

    # --8<-- [start:common-pooling-params]
    truncate_prompt_tokens: Annotated[int, msgspec.Meta(ge=-1)] | None = None
+    use_activation: bool | None = None
    # --8<-- [end:common-pooling-params]

    ## for embeddings models
    # --8<-- [start:embed-pooling-params]
    dimensions: int | None = None
-    normalize: bool | None = None
    # --8<-- [end:embed-pooling-params]

    ## for classification, scoring and rerank
    # --8<-- [start:classify-pooling-params]
-    softmax: bool | None = None
-    activation: bool | None = None
-    use_activation: bool | None = None
    # --8<-- [end:classify-pooling-params]

    ## for step pooling models
@@ -88,9 +81,6 @@ class PoolingParams(
            msg = f"You cannot overwrite {self.task=!r} with {task=!r}!"
            raise ValueError(msg)

-        # raise deprecated warning for softmax and activation
-        self.use_activation = get_use_activation(self)
-
        # plugin task uses io_processor.parse_request to verify inputs,
        # skipping PoolingParams verify
        if self.task == "plugin":