[Frontend] Support using chat template as custom score template for reranking models (#30550)

Signed-off-by: Jakub Zakrzewski <jzakrzewski@nvidia.com> Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io> Signed-off-by: wang.yuqi <noooop@126.com> Co-authored-by: wang.yuqi <yuqi.wang@daocloud.io>
2025-12-23 12:19:16 +01:00
parent 27c6c2f98c
commit 23daef548d
19 changed files with 663 additions and 46 deletions
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -203,6 +203,7 @@ _EMBEDDING_MODELS = {
    "GteNewModel": ("bert_with_rope", "GteNewModel"),
    "InternLM2ForRewardModel": ("internlm2", "InternLM2ForRewardModel"),
    "JambaForSequenceClassification": ("jamba", "JambaForSequenceClassification"),  # noqa: E501
+    "LlamaBidirectionalModel": ("llama", "LlamaBidirectionalModel"),
    "LlamaModel": ("llama", "LlamaForCausalLM"),
    **{
        # Multiple models share the same architecture, so we include them all
@@ -246,6 +247,11 @@ _CROSS_ENCODER_MODELS = {
        "bert_with_rope",
        "GteNewForSequenceClassification",
    ),
+    "JinaVLForRanking": ("jina_vl", "JinaVLForSequenceClassification"),
+    "LlamaBidirectionalForSequenceClassification": (
+        "llama",
+        "LlamaBidirectionalForSequenceClassification",
+    ),
    "ModernBertForSequenceClassification": (
        "modernbert",
        "ModernBertForSequenceClassification",
@@ -259,8 +265,6 @@ _CROSS_ENCODER_MODELS = {
        "roberta",
        "RobertaForSequenceClassification",
    ),
-    # [Auto-converted (see adapters.py)]
-    "JinaVLForRanking": ("jina_vl", "JinaVLForSequenceClassification"),  # noqa: E501,
 }

 _MULTIMODAL_MODELS = {