Fix pooling adapters for Transformers backend (#27338)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-24 04:23:55 +01:00
parent 70022ffc00
commit 1f9460c4c1
6 changed files with 97 additions and 74 deletions
--- a/vllm/model_executor/models/transformers/pooling.py
+++ b/vllm/model_executor/models/transformers/pooling.py
@@ -21,6 +21,7 @@ from typing import TYPE_CHECKING
 import torch
 from transformers import AutoModelForSequenceClassification

+from vllm.config.utils import getattr_iter
 from vllm.model_executor.layers.pooler import (
    ClassifierPooler,
    CLSPool,
@@ -82,14 +83,14 @@ class SequenceClassificationMixin(SupportsCrossEncoding, VllmModelForPooling):
            if hasattr(module, "pooler") and module.pooler is None:
                self.model.pooler = None
                break
-        if self.model.pooler is not None:
-            raise ValueError(
-                "Sequence classification models with pooling layers are not "
-                "supported yet in the Transformers backend."
-            )

        # Unlike `lm_head`, `classifier` is not always `nn.Linear`.
-        self.classifier = seq_cls_model.classifier
+        self.classifier = getattr_iter(seq_cls_model, ["classifier", "score"], None)
+        if self.classifier is None:
+            raise ValueError(
+                "Could not find `classifier` or `score` layer in the "
+                "`AutoModelForSequenceClassification` instance."
+            )
        self.init_parameters(self.classifier, dtype=self.model_config.head_dtype)

        class ClassifierWithReshape(self.classifier.__class__):