[Model] Pooling models default to using chunked prefill & prefix caching if supported. (#20930)

Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
wang.yuqi
2025-08-12 00:41:37 +08:00
committed by GitHub
parent 16fb668b61
commit 84cf78acee
31 changed files with 452 additions and 261 deletions

View File

@@ -6,20 +6,22 @@ import pytest
from vllm import PoolingParams
from ...utils import EmbedModelInfo, RerankModelInfo
from ...utils import (CLSPoolingEmbedModelInfo, CLSPoolingRerankModelInfo,
EmbedModelInfo, RerankModelInfo)
from .embed_utils import (check_embeddings_close,
correctness_test_embed_models, matryoshka_fy)
from .mteb_utils import mteb_test_embed_models, mteb_test_rerank_models
EMBEDDING_MODELS = [
EmbedModelInfo("jinaai/jina-embeddings-v3",
architecture="XLMRobertaModel",
is_matryoshka=True)
CLSPoolingEmbedModelInfo("jinaai/jina-embeddings-v3",
architecture="XLMRobertaModel",
is_matryoshka=True)
]
RERANK_MODELS = [
RerankModelInfo("jinaai/jina-reranker-v2-base-multilingual",
architecture="XLMRobertaForSequenceClassification")
CLSPoolingRerankModelInfo(
"jinaai/jina-reranker-v2-base-multilingual",
architecture="XLMRobertaForSequenceClassification")
]