diff --git a/docs/models/pooling_models.md b/docs/models/pooling_models.md index bf3b3e9c0..c1355fe49 100644 --- a/docs/models/pooling_models.md +++ b/docs/models/pooling_models.md @@ -352,15 +352,6 @@ We have split the `encode` task into two more specific token-wise tasks: `token_ - `token_embed` is the same as `embed`, using normalization as the activation. - `token_classify` is the same as `classify`, by default using softmax as the activation. -### Remove softmax from PoolingParams - -We are going to remove `softmax` and `activation` from `PoolingParams` in v0.15. Instead, use `use_activation`, since we allow `classify` and `token_classify` to use any activation function. - -### as_reward_model - -!!! warning - We are going to remove `--convert reward` in v0.15, use `--convert embed` instead. - Pooling models now default support all pooling, you can use it without any settings. - Extracting hidden states prefers using `token_embed` task. diff --git a/vllm/config/model.py b/vllm/config/model.py index 2f8a46072..563f8ac56 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -75,7 +75,7 @@ else: logger = init_logger(__name__) RunnerOption = Literal["auto", RunnerType] -ConvertType = Literal["none", "embed", "classify", "reward", "mm_encoder_only"] +ConvertType = Literal["none", "embed", "classify"] ConvertOption = Literal["auto", ConvertType] TokenizerMode = Literal["auto", "hf", "slow", "mistral", "deepseek_v32"] ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"] @@ -499,15 +499,6 @@ class ModelConfig: ) self.model_arch_config = self.get_model_arch_config() - if self.convert == "mm_encoder_only": - logger.warning_once( - "`--convert mm_encoder_only` is deprecated and " - "will be removed in v0.15. " - "Please use --mm-encoder-only` instead." - ) - mm_encoder_only = True - self.convert = "none" - architectures = self.architectures registry = self.registry is_generative_model = registry.is_text_generation_model(architectures, self) @@ -855,13 +846,6 @@ class ModelConfig: runner_type: RunnerType, convert: ConvertOption, ) -> ConvertType: - if convert == "reward": - logger.warning( - "`--convert reward` is deprecated and will be removed in v0.15. " - "Please use `--convert embed` instead." - ) - return "embed" - if convert != "auto": return convert diff --git a/vllm/config/pooler.py b/vllm/config/pooler.py index a3b1f1cbe..6d87ec908 100644 --- a/vllm/config/pooler.py +++ b/vllm/config/pooler.py @@ -45,11 +45,13 @@ class PoolerConfig: The pooling method used for tokenwise pooling. """ - ## for embeddings models - normalize: bool | None = None + use_activation: bool | None = None """ - DEPRECATED: please use `use_activation` instead. + Whether to apply activation function to the pooler outputs. + `None` uses the pooler's default, which is `True` in most cases. """ + + ## for embedding models dimensions: int | None = None """ Reduce the dimensions of embeddings if model @@ -73,19 +75,6 @@ class PoolerConfig: """ ## for classification models - softmax: float | None = None - """ - DEPRECATED: please use `use_activation` instead. - """ - activation: float | None = None - """ - DEPRECATED: please use `use_activation` instead. - """ - use_activation: bool | None = None - """ - Whether to apply activation function to the classification outputs. - Defaults to True. - """ logit_bias: float | None = None """ If provided, apply classification logit biases. Defaults to None. @@ -105,10 +94,7 @@ class PoolerConfig: `math-shepherd-mistral-7b-prm` model. """ - def __post_init__(self): - # raise deprecated warning for softmax and activation - self.use_activation = get_use_activation(self) - + def __post_init__(self) -> None: if pooling_type := self.pooling_type: if self.seq_pooling_type is not None: raise ValueError( @@ -161,28 +147,3 @@ class PoolerConfig: factors: list[Any] = [] hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest() return hash_str - - -def get_use_activation(o: object): - if (normalize := getattr(o, "normalize", None)) is not None: - logger.warning_once( - "`normalize` is deprecated and will be removed in v0.15. " - "Please use `use_activation` instead." - ) - return normalize - - if (softmax := getattr(o, "softmax", None)) is not None: - logger.warning_once( - "`softmax` is deprecated and will be removed in v0.15. " - "Please use `use_activation` instead." - ) - return softmax - - if (activation := getattr(o, "activation", None)) is not None: - logger.warning_once( - "`activation` is deprecated and will be removed in v0.15. " - "Please use `use_activation` instead." - ) - return activation - - return getattr(o, "use_activation", None) diff --git a/vllm/entrypoints/pooling/base/protocol.py b/vllm/entrypoints/pooling/base/protocol.py index 19a44a361..f27025970 100644 --- a/vllm/entrypoints/pooling/base/protocol.py +++ b/vllm/entrypoints/pooling/base/protocol.py @@ -7,16 +7,18 @@ from typing import Annotated, Any from pydantic import Field, model_validator from vllm import PoolingParams -from vllm.config.pooler import get_use_activation from vllm.entrypoints.chat_utils import ( ChatCompletionMessageParam, ChatTemplateContentFormatOption, ) from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel +from vllm.logger import init_logger from vllm.renderers import ChatParams, merge_kwargs from vllm.utils import random_uuid from vllm.utils.serial_utils import EmbedDType, EncodingFormat, Endianness +logger = init_logger(__name__) + class PoolingBasicRequestMixin(OpenAIBaseModel): # --8<-- [start:pooling-common-params] @@ -172,39 +174,43 @@ class EmbedRequestMixin(EncodingRequestMixin): # --8<-- [end:embed-params] # --8<-- [start:embed-extra-params] + use_activation: bool | None = Field( + default=None, + description="Whether to use activation for the pooler outputs. " + "`None` uses the pooler's default, which is `True` in most cases.", + ) normalize: bool | None = Field( default=None, - description="Whether to normalize the embeddings outputs. Default is True.", + description="Deprecated; please pass `use_activation` instead", ) # --8<-- [end:embed-extra-params] def to_pooling_params(self): + if self.normalize is not None: + logger.warning_once( + "`normalize` is deprecated and will be removed in v0.17. " + "Please pass `use_activation` instead." + ) + self.use_activation = self.normalize + return PoolingParams( dimensions=self.dimensions, - use_activation=self.normalize, + use_activation=self.use_activation, truncate_prompt_tokens=getattr(self, "truncate_prompt_tokens", None), ) class ClassifyRequestMixin(OpenAIBaseModel): # --8<-- [start:classify-extra-params] - softmax: bool | None = Field( - default=None, - description="softmax will be deprecated, please use use_activation instead.", - ) - activation: bool | None = Field( - default=None, - description="activation will be deprecated, please use use_activation instead.", - ) use_activation: bool | None = Field( default=None, - description="Whether to use activation for classification outputs. " - "Default is True.", + description="Whether to use activation for the pooler outputs. " + "`None` uses the pooler's default, which is `True` in most cases.", ) # --8<-- [end:classify-extra-params] def to_pooling_params(self): return PoolingParams( - use_activation=get_use_activation(self), + use_activation=self.use_activation, truncate_prompt_tokens=getattr(self, "truncate_prompt_tokens", None), ) diff --git a/vllm/entrypoints/pooling/pooling/protocol.py b/vllm/entrypoints/pooling/pooling/protocol.py index 633d0bb85..4818f851c 100644 --- a/vllm/entrypoints/pooling/pooling/protocol.py +++ b/vllm/entrypoints/pooling/pooling/protocol.py @@ -7,7 +7,6 @@ from pydantic import Field from vllm import PoolingParams from vllm.config import ModelConfig -from vllm.config.pooler import get_use_activation from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo from vllm.entrypoints.pooling.base.protocol import ( ChatRequestMixin, @@ -17,10 +16,13 @@ from vllm.entrypoints.pooling.base.protocol import ( EncodingRequestMixin, PoolingBasicRequestMixin, ) +from vllm.logger import init_logger from vllm.renderers import TokenizeParams from vllm.tasks import PoolingTask from vllm.utils import random_uuid +logger = init_logger(__name__) + class PoolingCompletionRequest( PoolingBasicRequestMixin, @@ -43,10 +45,17 @@ class PoolingCompletionRequest( ) def to_pooling_params(self): + if self.normalize is not None: + logger.warning_once( + "`normalize` is deprecated and will be removed in v0.17. " + "Please pass `use_activation` instead." + ) + self.use_activation = self.normalize + return PoolingParams( truncate_prompt_tokens=self.truncate_prompt_tokens, + use_activation=self.use_activation, dimensions=self.dimensions, - use_activation=get_use_activation(self), ) @@ -73,10 +82,17 @@ class PoolingChatRequest( ) def to_pooling_params(self): + if self.normalize is not None: + logger.warning_once( + "`normalize` is deprecated and will be removed in v0.17. " + "Please pass `use_activation` instead." + ) + self.use_activation = self.normalize + return PoolingParams( truncate_prompt_tokens=self.truncate_prompt_tokens, + use_activation=self.use_activation, dimensions=self.dimensions, - use_activation=get_use_activation(self), ) diff --git a/vllm/entrypoints/pooling/score/protocol.py b/vllm/entrypoints/pooling/score/protocol.py index e080ffd67..8f30126b3 100644 --- a/vllm/entrypoints/pooling/score/protocol.py +++ b/vllm/entrypoints/pooling/score/protocol.py @@ -7,7 +7,6 @@ from pydantic import BaseModel, Field from vllm import PoolingParams from vllm.config import ModelConfig -from vllm.config.pooler import get_use_activation from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo from vllm.entrypoints.pooling.base.protocol import ( ClassifyRequestMixin, @@ -43,7 +42,7 @@ class ScoreRequestMixin(PoolingBasicRequestMixin, ClassifyRequestMixin): def to_pooling_params(self): return PoolingParams( truncate_prompt_tokens=self.truncate_prompt_tokens, - use_activation=get_use_activation(self), + use_activation=self.use_activation, ) diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index 0ed437855..c41f5e18b 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -233,8 +233,8 @@ class Qwen2ForRewardModelConfig(VerifyAndUpdateConfig): def verify_and_update_model_config(model_config: "ModelConfig") -> None: pooler_config = model_config.pooler_config - if pooler_config.softmax is None: - pooler_config.softmax = False + if pooler_config.use_activation is None: + pooler_config.use_activation = False class Qwen3ForSequenceClassificationConfig(VerifyAndUpdateConfig): diff --git a/vllm/pooling_params.py b/vllm/pooling_params.py index 02e36eda4..1beb6906b 100644 --- a/vllm/pooling_params.py +++ b/vllm/pooling_params.py @@ -7,7 +7,6 @@ from typing import Annotated, Any import msgspec from vllm.config import ModelConfig, PoolerConfig -from vllm.config.pooler import get_use_activation from vllm.sampling_params import RequestOutputKind from vllm.tasks import PoolingTask @@ -24,30 +23,24 @@ class PoolingParams( Set to -1 to use the model's default truncation size. Set to k to keep only the last k tokens (left truncation). Set to None to disable truncation. + use_activation: Whether to apply activation function to the pooler outputs. + `None` uses the pooler's default, which is `True` in most cases. dimensions: Reduce the dimensions of embeddings if model support matryoshka representation. - normalize: Deprecated, please use use_activation instead. - softmax: Deprecated, please use use_activation instead. - activation: Deprecated, please use use_activation instead. - use_activation: Whether to apply activation function to - the classification outputs. """ # --8<-- [start:common-pooling-params] truncate_prompt_tokens: Annotated[int, msgspec.Meta(ge=-1)] | None = None + use_activation: bool | None = None # --8<-- [end:common-pooling-params] ## for embeddings models # --8<-- [start:embed-pooling-params] dimensions: int | None = None - normalize: bool | None = None # --8<-- [end:embed-pooling-params] ## for classification, scoring and rerank # --8<-- [start:classify-pooling-params] - softmax: bool | None = None - activation: bool | None = None - use_activation: bool | None = None # --8<-- [end:classify-pooling-params] ## for step pooling models @@ -88,9 +81,6 @@ class PoolingParams( msg = f"You cannot overwrite {self.task=!r} with {task=!r}!" raise ValueError(msg) - # raise deprecated warning for softmax and activation - self.use_activation = get_use_activation(self) - # plugin task uses io_processor.parse_request to verify inputs, # skipping PoolingParams verify if self.task == "plugin":