[Model][1/N] Automatic conversion of CrossEncoding model (#20012)

Signed-off-by: wang.yuqi <noooop@126.com>
2025-06-27 12:10:04 +08:00
parent e110930680
commit cd4cfee689
5 changed files with 239 additions and 167 deletions
--- a/vllm/model_executor/models/qwen3.py
+++ b/vllm/model_executor/models/qwen3.py
@@ -400,22 +400,10 @@ class Qwen3ForSequenceClassification(nn.Module, SupportsLoRA,

    def load_weights_from_original_qwen3_reranker(
            self, weights: Iterable[tuple[str, torch.Tensor]]):
-        tokens = getattr(self.config, "classifier_from_token", None)
-        assert tokens is not None and len(tokens) == 2, \
-            ("Try loading the original Qwen3 Reranker?, see: "
-             "https://github.com/vllm-project/vllm/tree/main/examples/offline_inference/qwen3_reranker.py")

-        self.config.num_labels = 1
        model_config = self.vllm_config.model_config
-
+        tokens = getattr(self.config, "classifier_from_token", None)
        device = self.score.weight.device
-        self.score = RowParallelLinear(self.config.hidden_size,
-                                       self.config.num_labels,
-                                       quant_config=self.quant_config,
-                                       input_is_parallel=False,
-                                       bias=False,
-                                       prefix=maybe_prefix(
-                                           self.prefix, "score")).to(device)

        if self.config.tie_word_embeddings:
            self.lm_head = self.model.embed_tokens
@@ -443,5 +431,6 @@ class Qwen3ForSequenceClassification(nn.Module, SupportsLoRA,
        self.score.weight.data.copy_(weight)

        del self.lm_head
-        loaded_weights.add("classifier.weight")
+        loaded_weights.add("score.weight")
        loaded_weights.discard("lm_head.weight")
+        return loaded_weights