[Deprecation] Remove deprecated items related to pooling (#33477)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -352,15 +352,6 @@ We have split the `encode` task into two more specific token-wise tasks: `token_
|
||||
- `token_embed` is the same as `embed`, using normalization as the activation.
|
||||
- `token_classify` is the same as `classify`, by default using softmax as the activation.
|
||||
|
||||
### Remove softmax from PoolingParams
|
||||
|
||||
We are going to remove `softmax` and `activation` from `PoolingParams` in v0.15. Instead, use `use_activation`, since we allow `classify` and `token_classify` to use any activation function.
|
||||
|
||||
### as_reward_model
|
||||
|
||||
!!! warning
|
||||
We are going to remove `--convert reward` in v0.15, use `--convert embed` instead.
|
||||
|
||||
Pooling models now default support all pooling, you can use it without any settings.
|
||||
|
||||
- Extracting hidden states prefers using `token_embed` task.
|
||||
|
||||
@@ -75,7 +75,7 @@ else:
|
||||
logger = init_logger(__name__)
|
||||
|
||||
RunnerOption = Literal["auto", RunnerType]
|
||||
ConvertType = Literal["none", "embed", "classify", "reward", "mm_encoder_only"]
|
||||
ConvertType = Literal["none", "embed", "classify"]
|
||||
ConvertOption = Literal["auto", ConvertType]
|
||||
TokenizerMode = Literal["auto", "hf", "slow", "mistral", "deepseek_v32"]
|
||||
ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"]
|
||||
@@ -499,15 +499,6 @@ class ModelConfig:
|
||||
)
|
||||
self.model_arch_config = self.get_model_arch_config()
|
||||
|
||||
if self.convert == "mm_encoder_only":
|
||||
logger.warning_once(
|
||||
"`--convert mm_encoder_only` is deprecated and "
|
||||
"will be removed in v0.15. "
|
||||
"Please use --mm-encoder-only` instead."
|
||||
)
|
||||
mm_encoder_only = True
|
||||
self.convert = "none"
|
||||
|
||||
architectures = self.architectures
|
||||
registry = self.registry
|
||||
is_generative_model = registry.is_text_generation_model(architectures, self)
|
||||
@@ -855,13 +846,6 @@ class ModelConfig:
|
||||
runner_type: RunnerType,
|
||||
convert: ConvertOption,
|
||||
) -> ConvertType:
|
||||
if convert == "reward":
|
||||
logger.warning(
|
||||
"`--convert reward` is deprecated and will be removed in v0.15. "
|
||||
"Please use `--convert embed` instead."
|
||||
)
|
||||
return "embed"
|
||||
|
||||
if convert != "auto":
|
||||
return convert
|
||||
|
||||
|
||||
@@ -45,11 +45,13 @@ class PoolerConfig:
|
||||
The pooling method used for tokenwise pooling.
|
||||
"""
|
||||
|
||||
## for embeddings models
|
||||
normalize: bool | None = None
|
||||
use_activation: bool | None = None
|
||||
"""
|
||||
DEPRECATED: please use `use_activation` instead.
|
||||
Whether to apply activation function to the pooler outputs.
|
||||
`None` uses the pooler's default, which is `True` in most cases.
|
||||
"""
|
||||
|
||||
## for embedding models
|
||||
dimensions: int | None = None
|
||||
"""
|
||||
Reduce the dimensions of embeddings if model
|
||||
@@ -73,19 +75,6 @@ class PoolerConfig:
|
||||
"""
|
||||
|
||||
## for classification models
|
||||
softmax: float | None = None
|
||||
"""
|
||||
DEPRECATED: please use `use_activation` instead.
|
||||
"""
|
||||
activation: float | None = None
|
||||
"""
|
||||
DEPRECATED: please use `use_activation` instead.
|
||||
"""
|
||||
use_activation: bool | None = None
|
||||
"""
|
||||
Whether to apply activation function to the classification outputs.
|
||||
Defaults to True.
|
||||
"""
|
||||
logit_bias: float | None = None
|
||||
"""
|
||||
If provided, apply classification logit biases. Defaults to None.
|
||||
@@ -105,10 +94,7 @@ class PoolerConfig:
|
||||
`math-shepherd-mistral-7b-prm` model.
|
||||
"""
|
||||
|
||||
def __post_init__(self):
|
||||
# raise deprecated warning for softmax and activation
|
||||
self.use_activation = get_use_activation(self)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if pooling_type := self.pooling_type:
|
||||
if self.seq_pooling_type is not None:
|
||||
raise ValueError(
|
||||
@@ -161,28 +147,3 @@ class PoolerConfig:
|
||||
factors: list[Any] = []
|
||||
hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest()
|
||||
return hash_str
|
||||
|
||||
|
||||
def get_use_activation(o: object):
|
||||
if (normalize := getattr(o, "normalize", None)) is not None:
|
||||
logger.warning_once(
|
||||
"`normalize` is deprecated and will be removed in v0.15. "
|
||||
"Please use `use_activation` instead."
|
||||
)
|
||||
return normalize
|
||||
|
||||
if (softmax := getattr(o, "softmax", None)) is not None:
|
||||
logger.warning_once(
|
||||
"`softmax` is deprecated and will be removed in v0.15. "
|
||||
"Please use `use_activation` instead."
|
||||
)
|
||||
return softmax
|
||||
|
||||
if (activation := getattr(o, "activation", None)) is not None:
|
||||
logger.warning_once(
|
||||
"`activation` is deprecated and will be removed in v0.15. "
|
||||
"Please use `use_activation` instead."
|
||||
)
|
||||
return activation
|
||||
|
||||
return getattr(o, "use_activation", None)
|
||||
|
||||
@@ -7,16 +7,18 @@ from typing import Annotated, Any
|
||||
from pydantic import Field, model_validator
|
||||
|
||||
from vllm import PoolingParams
|
||||
from vllm.config.pooler import get_use_activation
|
||||
from vllm.entrypoints.chat_utils import (
|
||||
ChatCompletionMessageParam,
|
||||
ChatTemplateContentFormatOption,
|
||||
)
|
||||
from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel
|
||||
from vllm.logger import init_logger
|
||||
from vllm.renderers import ChatParams, merge_kwargs
|
||||
from vllm.utils import random_uuid
|
||||
from vllm.utils.serial_utils import EmbedDType, EncodingFormat, Endianness
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
class PoolingBasicRequestMixin(OpenAIBaseModel):
|
||||
# --8<-- [start:pooling-common-params]
|
||||
@@ -172,39 +174,43 @@ class EmbedRequestMixin(EncodingRequestMixin):
|
||||
# --8<-- [end:embed-params]
|
||||
|
||||
# --8<-- [start:embed-extra-params]
|
||||
use_activation: bool | None = Field(
|
||||
default=None,
|
||||
description="Whether to use activation for the pooler outputs. "
|
||||
"`None` uses the pooler's default, which is `True` in most cases.",
|
||||
)
|
||||
normalize: bool | None = Field(
|
||||
default=None,
|
||||
description="Whether to normalize the embeddings outputs. Default is True.",
|
||||
description="Deprecated; please pass `use_activation` instead",
|
||||
)
|
||||
# --8<-- [end:embed-extra-params]
|
||||
|
||||
def to_pooling_params(self):
|
||||
if self.normalize is not None:
|
||||
logger.warning_once(
|
||||
"`normalize` is deprecated and will be removed in v0.17. "
|
||||
"Please pass `use_activation` instead."
|
||||
)
|
||||
self.use_activation = self.normalize
|
||||
|
||||
return PoolingParams(
|
||||
dimensions=self.dimensions,
|
||||
use_activation=self.normalize,
|
||||
use_activation=self.use_activation,
|
||||
truncate_prompt_tokens=getattr(self, "truncate_prompt_tokens", None),
|
||||
)
|
||||
|
||||
|
||||
class ClassifyRequestMixin(OpenAIBaseModel):
|
||||
# --8<-- [start:classify-extra-params]
|
||||
softmax: bool | None = Field(
|
||||
default=None,
|
||||
description="softmax will be deprecated, please use use_activation instead.",
|
||||
)
|
||||
activation: bool | None = Field(
|
||||
default=None,
|
||||
description="activation will be deprecated, please use use_activation instead.",
|
||||
)
|
||||
use_activation: bool | None = Field(
|
||||
default=None,
|
||||
description="Whether to use activation for classification outputs. "
|
||||
"Default is True.",
|
||||
description="Whether to use activation for the pooler outputs. "
|
||||
"`None` uses the pooler's default, which is `True` in most cases.",
|
||||
)
|
||||
# --8<-- [end:classify-extra-params]
|
||||
|
||||
def to_pooling_params(self):
|
||||
return PoolingParams(
|
||||
use_activation=get_use_activation(self),
|
||||
use_activation=self.use_activation,
|
||||
truncate_prompt_tokens=getattr(self, "truncate_prompt_tokens", None),
|
||||
)
|
||||
|
||||
@@ -7,7 +7,6 @@ from pydantic import Field
|
||||
|
||||
from vllm import PoolingParams
|
||||
from vllm.config import ModelConfig
|
||||
from vllm.config.pooler import get_use_activation
|
||||
from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo
|
||||
from vllm.entrypoints.pooling.base.protocol import (
|
||||
ChatRequestMixin,
|
||||
@@ -17,10 +16,13 @@ from vllm.entrypoints.pooling.base.protocol import (
|
||||
EncodingRequestMixin,
|
||||
PoolingBasicRequestMixin,
|
||||
)
|
||||
from vllm.logger import init_logger
|
||||
from vllm.renderers import TokenizeParams
|
||||
from vllm.tasks import PoolingTask
|
||||
from vllm.utils import random_uuid
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
class PoolingCompletionRequest(
|
||||
PoolingBasicRequestMixin,
|
||||
@@ -43,10 +45,17 @@ class PoolingCompletionRequest(
|
||||
)
|
||||
|
||||
def to_pooling_params(self):
|
||||
if self.normalize is not None:
|
||||
logger.warning_once(
|
||||
"`normalize` is deprecated and will be removed in v0.17. "
|
||||
"Please pass `use_activation` instead."
|
||||
)
|
||||
self.use_activation = self.normalize
|
||||
|
||||
return PoolingParams(
|
||||
truncate_prompt_tokens=self.truncate_prompt_tokens,
|
||||
use_activation=self.use_activation,
|
||||
dimensions=self.dimensions,
|
||||
use_activation=get_use_activation(self),
|
||||
)
|
||||
|
||||
|
||||
@@ -73,10 +82,17 @@ class PoolingChatRequest(
|
||||
)
|
||||
|
||||
def to_pooling_params(self):
|
||||
if self.normalize is not None:
|
||||
logger.warning_once(
|
||||
"`normalize` is deprecated and will be removed in v0.17. "
|
||||
"Please pass `use_activation` instead."
|
||||
)
|
||||
self.use_activation = self.normalize
|
||||
|
||||
return PoolingParams(
|
||||
truncate_prompt_tokens=self.truncate_prompt_tokens,
|
||||
use_activation=self.use_activation,
|
||||
dimensions=self.dimensions,
|
||||
use_activation=get_use_activation(self),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@ from pydantic import BaseModel, Field
|
||||
|
||||
from vllm import PoolingParams
|
||||
from vllm.config import ModelConfig
|
||||
from vllm.config.pooler import get_use_activation
|
||||
from vllm.entrypoints.openai.engine.protocol import OpenAIBaseModel, UsageInfo
|
||||
from vllm.entrypoints.pooling.base.protocol import (
|
||||
ClassifyRequestMixin,
|
||||
@@ -43,7 +42,7 @@ class ScoreRequestMixin(PoolingBasicRequestMixin, ClassifyRequestMixin):
|
||||
def to_pooling_params(self):
|
||||
return PoolingParams(
|
||||
truncate_prompt_tokens=self.truncate_prompt_tokens,
|
||||
use_activation=get_use_activation(self),
|
||||
use_activation=self.use_activation,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -233,8 +233,8 @@ class Qwen2ForRewardModelConfig(VerifyAndUpdateConfig):
|
||||
def verify_and_update_model_config(model_config: "ModelConfig") -> None:
|
||||
pooler_config = model_config.pooler_config
|
||||
|
||||
if pooler_config.softmax is None:
|
||||
pooler_config.softmax = False
|
||||
if pooler_config.use_activation is None:
|
||||
pooler_config.use_activation = False
|
||||
|
||||
|
||||
class Qwen3ForSequenceClassificationConfig(VerifyAndUpdateConfig):
|
||||
|
||||
@@ -7,7 +7,6 @@ from typing import Annotated, Any
|
||||
import msgspec
|
||||
|
||||
from vllm.config import ModelConfig, PoolerConfig
|
||||
from vllm.config.pooler import get_use_activation
|
||||
from vllm.sampling_params import RequestOutputKind
|
||||
from vllm.tasks import PoolingTask
|
||||
|
||||
@@ -24,30 +23,24 @@ class PoolingParams(
|
||||
Set to -1 to use the model's default truncation size.
|
||||
Set to k to keep only the last k tokens (left truncation).
|
||||
Set to None to disable truncation.
|
||||
use_activation: Whether to apply activation function to the pooler outputs.
|
||||
`None` uses the pooler's default, which is `True` in most cases.
|
||||
dimensions: Reduce the dimensions of embeddings
|
||||
if model support matryoshka representation.
|
||||
normalize: Deprecated, please use use_activation instead.
|
||||
softmax: Deprecated, please use use_activation instead.
|
||||
activation: Deprecated, please use use_activation instead.
|
||||
use_activation: Whether to apply activation function to
|
||||
the classification outputs.
|
||||
"""
|
||||
|
||||
# --8<-- [start:common-pooling-params]
|
||||
truncate_prompt_tokens: Annotated[int, msgspec.Meta(ge=-1)] | None = None
|
||||
use_activation: bool | None = None
|
||||
# --8<-- [end:common-pooling-params]
|
||||
|
||||
## for embeddings models
|
||||
# --8<-- [start:embed-pooling-params]
|
||||
dimensions: int | None = None
|
||||
normalize: bool | None = None
|
||||
# --8<-- [end:embed-pooling-params]
|
||||
|
||||
## for classification, scoring and rerank
|
||||
# --8<-- [start:classify-pooling-params]
|
||||
softmax: bool | None = None
|
||||
activation: bool | None = None
|
||||
use_activation: bool | None = None
|
||||
# --8<-- [end:classify-pooling-params]
|
||||
|
||||
## for step pooling models
|
||||
@@ -88,9 +81,6 @@ class PoolingParams(
|
||||
msg = f"You cannot overwrite {self.task=!r} with {task=!r}!"
|
||||
raise ValueError(msg)
|
||||
|
||||
# raise deprecated warning for softmax and activation
|
||||
self.use_activation = get_use_activation(self)
|
||||
|
||||
# plugin task uses io_processor.parse_request to verify inputs,
|
||||
# skipping PoolingParams verify
|
||||
if self.task == "plugin":
|
||||
|
||||
Reference in New Issue
Block a user