[Model] Deprecate the score task (this will not affect users). (#37537)

Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
2026-03-20 16:07:56 +08:00
parent dcee9be95a
commit ed359c497a
22 changed files with 184 additions and 163 deletions
--- a/vllm/model_executor/layers/pooler/activations.py
+++ b/vllm/model_executor/layers/pooler/activations.py
@@ -16,25 +16,22 @@ from vllm.utils.import_utils import resolve_obj_by_qualname
 logger = init_logger(__name__)


-def get_classification_act_fn(
+def get_act_fn(
    config: PretrainedConfig,
+    static_num_labels: bool = True,
 ) -> "PoolerActivation":
+    # get classification act_fn
    # Implement alignment with transformers ForSequenceClassificationLoss
    # https://github.com/huggingface/transformers/blob/57bb6db6ee4cfaccc45b8d474dfad5a17811ca60/src/transformers/loss/loss_utils.py#L92
    problem_type = getattr(config, "problem_type", "")
    if problem_type == "regression":
        return PoolerIdentity()
    if problem_type == "single_label_classification":
-        return PoolerClassify()
+        return PoolerClassify(static_num_labels=static_num_labels)
    if problem_type == "multi_label_classification":
        return PoolerMultiLabelClassify()

-    return PoolerClassify()
-
-
-def get_cross_encoder_act_fn(
-    config: PretrainedConfig,
-) -> "PoolerActivation":
+    # get cross_encoder act_fn
    function_name: str | None = None
    if (
        hasattr(config, "sentence_transformers")
@@ -55,24 +52,16 @@ def get_cross_encoder_act_fn(
        fn = resolve_obj_by_qualname(function_name)()
        return PoolerActivation.wraps(fn)

-    return PoolerClassify()
+    return PoolerClassify(static_num_labels=static_num_labels)


 def resolve_classifier_act_fn(
    model_config: ModelConfig,
    static_num_labels: bool = True,
-    act_fn: "PoolerActivation | str | None" = None,
+    act_fn: "PoolerActivation | None" = None,
 ):
-    if isinstance(act_fn, str):
-        if act_fn == "classify":
-            return get_classification_act_fn(model_config.hf_config)
-        if act_fn == "score":
-            return get_cross_encoder_act_fn(model_config.hf_config)
-
-        raise ValueError(f"act_fn [{act_fn=}] not supported.")
-
    if act_fn is None:
-        return PoolerClassify(static_num_labels=static_num_labels)
+        return get_act_fn(model_config.hf_config, static_num_labels)

    assert callable(act_fn)
    return act_fn
@@ -97,9 +86,8 @@ class PoolerActivation(nn.Module, ABC):

    def forward(self, pooled_data: _T) -> _T:
        # shape:
-        # classify (& score) -> (batch_size, num_classes)
-        # embed -> (batch_size, embedding_dim) or list(embedding_dim)
-        #          (batch_size, dimensions) or list(dimensions) if using MRL
+        # classify -> (batch_size, num_classes)
+        # embed -> (batch_size, embedding_size) or list(embedding_size)
        if isinstance(pooled_data, list):
            return [self.forward_chunk(data) for data in pooled_data]

--- a/vllm/model_executor/layers/pooler/seqwise/heads.py
+++ b/vllm/model_executor/layers/pooler/seqwise/heads.py
@@ -56,29 +56,31 @@ class EmbeddingPoolerHead(SequencePoolerHead):

        if isinstance(pooled_data, list):
            pooled_data = torch.stack(pooled_data)
-        # pooled_data shape: [batchsize, hidden_dimension]
+        # pooled_data shape: [batchsize, hidden_size]

        if self.head_dtype is not None:
            pooled_data = pooled_data.to(self.head_dtype)

        # Apply ST projector
        if self.projector is not None:
-            pooled_data = self.projector(pooled_data)
-        # pooled_data shape: [batchsize, embedding_dimension]
+            embeddings = self.projector(pooled_data)
+        else:
+            embeddings = pooled_data
+        # embeddings shape: [batchsize, embedding_size]

        # for matryoshka representation
        dimensions_list = [pooling_param.dimensions for pooling_param in pooling_params]
        if any(d is not None for d in dimensions_list):
            # change the output dimension
-            assert len(pooled_data) == len(dimensions_list)
-            if len(set(dimensions_list)) == 1 and not isinstance(pooled_data, list):
+            assert len(embeddings) == len(dimensions_list)
+            if len(set(dimensions_list)) == 1 and not isinstance(embeddings, list):
                # if all dimensions are the same
                d = dimensions_list[0]
-                pooled_data = pooled_data[..., :d]
+                embeddings = embeddings[..., :d]
            else:
-                pooled_data = [
+                embeddings = [
                    vecs if d is None else vecs[..., :d]
-                    for vecs, d in zip(pooled_data, dimensions_list)
+                    for vecs, d in zip(embeddings, dimensions_list)
                ]

        # for normalize
@@ -86,15 +88,15 @@ class EmbeddingPoolerHead(SequencePoolerHead):
            flags = [p.use_activation for p in pooling_params]
            if len(set(flags)) == 1:
                if flags[0]:
-                    pooled_data = self.activation(pooled_data)
+                    embeddings = self.activation(embeddings)
            else:
-                pooled_data = [
+                embeddings = [
                    self.activation(vecs) if f else vecs
-                    for vecs, f in zip(pooled_data, flags)
+                    for vecs, f in zip(embeddings, flags)
                ]

-        # pooled_data shape: [batchsize, embedding_dimension]
-        return pooled_data
+        # embeddings shape: [batchsize, embedding_size]
+        return embeddings


 class ClassifierPoolerHead(SequencePoolerHead):
@@ -113,7 +115,7 @@ class ClassifierPoolerHead(SequencePoolerHead):
        self.activation = activation

    def get_supported_tasks(self) -> Set[PoolingTask]:
-        return {"classify", "score"}
+        return {"classify"}

    def forward(
        self,
@@ -131,21 +133,23 @@ class ClassifierPoolerHead(SequencePoolerHead):
            pooled_data = pooled_data.to(self.head_dtype)

        if self.classifier is not None:
-            pooled_data = self.classifier(pooled_data)
-        # pooled_data shape: [batchsize, num_labels]
+            logits = self.classifier(pooled_data)
+        else:
+            logits = pooled_data

+        # logits shape: [batchsize, num_labels]
        if self.logit_bias is not None:
-            pooled_data -= self.logit_bias
+            logits -= self.logit_bias

        if self.activation is not None:
            flags = [p.use_activation for p in pooling_params]
            if len(set(flags)) == 1:
-                pooled_data = self.activation(pooled_data) if flags[0] else pooled_data
+                logits = self.activation(logits) if flags[0] else logits
            else:
-                pooled_data = [
+                logits = [
                    self.activation(vecs) if f else vecs
-                    for vecs, f in zip(pooled_data, flags)
+                    for vecs, f in zip(logits, flags)
                ]

-        # pooled_data shape: [batchsize, num_labels]
-        return pooled_data
+        # logits shape: [batchsize, num_labels]
+        return logits
--- a/vllm/model_executor/layers/pooler/seqwise/methods.py
+++ b/vllm/model_executor/layers/pooler/seqwise/methods.py
@@ -17,7 +17,7 @@ SequencePoolingMethodOutput: TypeAlias = torch.Tensor | list[torch.Tensor]

 class SequencePoolingMethod(nn.Module, ABC):
    def get_supported_tasks(self) -> Set[PoolingTask]:
-        return {"token_embed", "token_classify", "embed", "classify", "score"}
+        return {"token_embed", "token_classify", "embed", "classify"}

    def get_pooling_updates(self, task: PoolingTask) -> PoolingParamsUpdate:
        return PoolingParamsUpdate()
--- a/vllm/model_executor/layers/pooler/seqwise/poolers.py
+++ b/vllm/model_executor/layers/pooler/seqwise/poolers.py
@@ -108,7 +108,7 @@ def pooler_for_classify(
    *,
    pooling: SequencePoolingMethod | SequencePoolingFn | None = None,
    classifier: ClassifierFn | None = None,
-    act_fn: PoolerActivation | str | None = None,
+    act_fn: PoolerActivation | None = None,
 ):
    if pooling is None:
        pooling = get_seq_pooling_method(pooler_config.get_seq_pooling_type())
--- a/vllm/model_executor/layers/pooler/special.py
+++ b/vllm/model_executor/layers/pooler/special.py
@@ -52,13 +52,6 @@ class DispatchPooler(Pooler):
                    pooler_config,
                    pooling=pooling,
                    classifier=classifier,
-                    act_fn="classify",
-                ),
-                "score": pooler_for_classify(
-                    pooler_config,
-                    pooling=pooling,
-                    classifier=classifier,
-                    act_fn="score",
                ),
            }
        )
@@ -115,7 +108,7 @@ class DispatchPooler(Pooler):

 class IdentityPooler(Pooler):
    def get_supported_tasks(self) -> Set[PoolingTask]:
-        return {"plugin", "score"}
+        return {"plugin"}

    def forward(
        self,
--- a/vllm/model_executor/layers/pooler/tokwise/heads.py
+++ b/vllm/model_executor/layers/pooler/tokwise/heads.py
@@ -68,22 +68,24 @@ class TokenEmbeddingPoolerHead(TokenPoolerHead):

        if self.head_dtype is not None:
            pooled_data = pooled_data.to(self.head_dtype)
-        # pooled_data shape: [n_tokens, hidden_dimension]
+        # pooled_data shape: [n_tokens, hidden_size]

        # Apply ST projector
        if self.projector is not None:
-            pooled_data = self.projector(pooled_data)
-        # pooled_data shape: [n_tokens, embedding_dimension]
+            embeddings = self.projector(pooled_data)
+        else:
+            embeddings = pooled_data
+        # embeddings shape: [n_tokens, embedding_size]

        # for matryoshka representation
-        pooled_data = pooled_data[..., : pooling_param.dimensions]
+        embeddings = embeddings[..., : pooling_param.dimensions]

        # for normalize
        if self.activation is not None and pooling_param.use_activation:
-            pooled_data = self.activation(pooled_data)
+            embeddings = self.activation(embeddings)

-        # pooled_data shape: [n_tokens, embedding_dimension]
-        return pooled_data
+        # embeddings shape: [n_tokens, embedding_size]
+        return embeddings


 class TokenClassifierPoolerHead(TokenPoolerHead):
@@ -118,16 +120,16 @@ class TokenClassifierPoolerHead(TokenPoolerHead):
        # hidden_states shape: [n_token, hidden_size]

        if self.classifier is not None:
-            scores = self.classifier(pooled_data)
+            logits = self.classifier(pooled_data)
        else:
-            scores = pooled_data
-        # scores shape: [n_token, num_labels]
+            logits = pooled_data
+        # logits shape: [n_token, num_labels]

        if self.logit_bias is not None:
-            scores -= self.logit_bias
+            logits -= self.logit_bias

        if self.activation is not None and pooling_param.use_activation:
-            scores = self.activation(scores)
+            logits = self.activation(logits)

-        # scores shape: [n_token, num_labels]
-        return scores
+        # logits shape: [n_token, num_labels]
+        return logits
--- a/vllm/model_executor/layers/pooler/tokwise/poolers.py
+++ b/vllm/model_executor/layers/pooler/tokwise/poolers.py
@@ -116,7 +116,7 @@ def pooler_for_token_classify(
    *,
    pooling: TokenPoolingMethod | TokenPoolingFn | None = None,
    classifier: ClassifierFn | None = None,
-    act_fn: PoolerActivation | str | None = None,
+    act_fn: PoolerActivation | None = None,
 ):
    if pooling is None:
        pooling = get_tok_pooling_method(pooler_config.get_tok_pooling_type())