[Refactor] Separate sequence and token pooling types (#32026)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-01-10 12:53:24 +08:00
committed by GitHub
parent 52d428295d
commit 583a90e005
42 changed files with 324 additions and 204 deletions

View File

@@ -162,8 +162,11 @@ def mteb_test_embed_models(
assert model_info.architecture in model_config.architectures
# Confirm whether the important configs in model_config are correct.
if model_info.pooling_type is not None:
assert model_config.pooler_config.pooling_type == model_info.pooling_type
pooler_config = model_config.pooler_config
if model_info.seq_pooling_type is not None:
assert pooler_config.seq_pooling_type == model_info.seq_pooling_type
if model_info.tok_pooling_type is not None:
assert pooler_config.tok_pooling_type == model_info.tok_pooling_type
if model_info.attn_type is not None:
assert model_config.attn_type == model_info.attn_type
if model_info.is_prefix_caching_supported is not None:

View File

@@ -254,8 +254,11 @@ def mteb_test_rerank_models(
assert model_config.hf_config.num_labels == 1
# Confirm whether the important configs in model_config are correct.
if model_info.pooling_type is not None:
assert model_config.pooler_config.pooling_type == model_info.pooling_type
pooler_config = model_config.pooler_config
if model_info.seq_pooling_type is not None:
assert pooler_config.seq_pooling_type == model_info.seq_pooling_type
if model_info.tok_pooling_type is not None:
assert pooler_config.tok_pooling_type == model_info.tok_pooling_type
if model_info.attn_type is not None:
assert model_config.attn_type == model_info.attn_type
if model_info.is_prefix_caching_supported is not None:

View File

@@ -17,7 +17,7 @@ MODELS = [
"BAAI/bge-base-en",
architecture="BertModel",
mteb_score=0.779336792,
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -54,7 +54,7 @@ MODELS = [
"BAAI/bge-m3",
architecture="XLMRobertaModel",
mteb_score=0.787343078,
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -65,7 +65,7 @@ MODELS = [
"BAAI/bge-code-v1",
architecture="Qwen2Model",
mteb_score=0.75724465,
pooling_type="LAST",
seq_pooling_type="LAST",
attn_type="decoder",
is_prefix_caching_supported=True,
is_chunked_prefill_supported=True,
@@ -79,7 +79,7 @@ RERANK_MODELS = [
"BAAI/bge-reranker-base",
architecture="XLMRobertaForSequenceClassification",
mteb_score=0.32398,
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,

View File

@@ -26,7 +26,7 @@ RERANK_MODELS = [
"method": "no_post_processing",
},
mteb_score=0.33757,
pooling_type="LAST",
seq_pooling_type="LAST",
attn_type="decoder",
is_prefix_caching_supported=True,
is_chunked_prefill_supported=True,

View File

@@ -12,7 +12,7 @@ RERANK_MODELS = [
RerankModelInfo(
"cross-encoder/ms-marco-TinyBERT-L-2-v2",
architecture="BertForSequenceClassification",
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -21,7 +21,7 @@ RERANK_MODELS = [
RerankModelInfo(
"tomaarsen/Qwen3-Reranker-0.6B-seq-cls",
architecture="Qwen3ForSequenceClassification",
pooling_type="LAST",
seq_pooling_type="LAST",
attn_type="decoder",
is_prefix_caching_supported=True,
is_chunked_prefill_supported=True,

View File

@@ -18,7 +18,7 @@ MODELS = [
"thenlper/gte-large",
mteb_score=0.76807651,
architecture="BertModel",
pooling_type="MEAN",
seq_pooling_type="MEAN",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -44,7 +44,7 @@ MODELS = [
architecture="GteNewModel",
mteb_score=0.775074696,
hf_overrides={"architectures": ["GteNewModel"]},
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -67,7 +67,7 @@ MODELS = [
"Alibaba-NLP/gte-Qwen2-1.5B-instruct",
mteb_score=0.758473459018872,
architecture="Qwen2ForCausalLM",
pooling_type="LAST",
seq_pooling_type="LAST",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -78,7 +78,7 @@ MODELS = [
"Alibaba-NLP/gte-modernbert-base",
mteb_score=0.748193353,
architecture="ModernBertModel",
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -89,7 +89,7 @@ MODELS = [
"Qwen/Qwen3-Embedding-0.6B",
mteb_score=0.771163695,
architecture="Qwen3ForCausalLM",
pooling_type="LAST",
seq_pooling_type="LAST",
attn_type="decoder",
is_prefix_caching_supported=True,
is_chunked_prefill_supported=True,
@@ -108,7 +108,7 @@ RERANK_MODELS = [
"Alibaba-NLP/gte-reranker-modernbert-base",
mteb_score=0.33386,
architecture="ModernBertForSequenceClassification",
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -119,7 +119,7 @@ RERANK_MODELS = [
mteb_score=0.33062,
architecture="GteNewForSequenceClassification",
hf_overrides={"architectures": ["GteNewForSequenceClassification"]},
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,

View File

@@ -13,7 +13,7 @@ MODELS = [
"intfloat/e5-small",
architecture="BertModel",
mteb_score=0.742285423,
pooling_type="MEAN",
seq_pooling_type="MEAN",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -29,7 +29,7 @@ MODELS = [
"intfloat/multilingual-e5-base",
architecture="XLMRobertaModel",
mteb_score=0.779325955,
pooling_type="MEAN",
seq_pooling_type="MEAN",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,

View File

@@ -24,7 +24,7 @@ EMBEDDING_MODELS = [
mteb_score=0.824413164,
architecture="XLMRobertaModel",
is_matryoshka=True,
pooling_type="MEAN",
seq_pooling_type="MEAN",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -36,7 +36,7 @@ RERANK_MODELS = [
"jinaai/jina-reranker-v2-base-multilingual",
mteb_score=0.33643,
architecture="XLMRobertaForSequenceClassification",
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,

View File

@@ -24,7 +24,7 @@ RERANK_MODELS = [
"mixedbread-ai/mxbai-rerank-base-v2",
architecture="Qwen2ForSequenceClassification",
hf_overrides=mxbai_rerank_hf_overrides,
pooling_type="LAST",
seq_pooling_type="LAST",
attn_type="decoder",
is_prefix_caching_supported=True,
is_chunked_prefill_supported=True,

View File

@@ -19,7 +19,7 @@ EMBEDDING_MODELS = [
"nvidia/llama-nemotron-embed-1b-v2",
architecture="LlamaBidirectionalModel",
mteb_score=0.689164662128673,
pooling_type="MEAN",
seq_pooling_type="MEAN",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -32,7 +32,7 @@ RERANK_MODELS = [
architecture="LlamaBidirectionalForSequenceClassification",
chat_template_name="nemotron-rerank.jinja",
mteb_score=0.33994,
pooling_type="MEAN",
seq_pooling_type="MEAN",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,

View File

@@ -14,7 +14,7 @@ MODELS = [
architecture="NomicBertModel",
mteb_score=0.737568559,
enable_test=True,
pooling_type="MEAN",
seq_pooling_type="MEAN",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -32,7 +32,7 @@ MODELS = [
architecture="NomicBertModel",
mteb_score=0.715488912,
enable_test=True,
pooling_type="MEAN",
seq_pooling_type="MEAN",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,

View File

@@ -27,7 +27,7 @@ RERANK_MODELS = [
architecture="Qwen3ForSequenceClassification",
hf_overrides=qwen3_reranker_hf_overrides,
chat_template_name="qwen3_reranker.jinja",
pooling_type="LAST",
seq_pooling_type="LAST",
attn_type="decoder",
is_prefix_caching_supported=True,
is_chunked_prefill_supported=True,

View File

@@ -14,7 +14,7 @@ MODELS = [
is_matryoshka=False,
architecture="BertModel",
mteb_score=0.714927797,
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -37,7 +37,7 @@ MODELS = [
is_matryoshka=False,
architecture="NomicBertModel",
mteb_score=0.681146831,
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -54,7 +54,7 @@ MODELS = [
is_matryoshka=True,
architecture="BertModel",
mteb_score=0.649088363,
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -65,7 +65,7 @@ MODELS = [
is_matryoshka=True,
architecture="XLMRobertaModel",
mteb_score=0.712258299,
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -76,7 +76,7 @@ MODELS = [
is_matryoshka=True,
architecture="GteModel",
mteb_score=0.706622444,
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,

View File

@@ -14,7 +14,7 @@ ST_PROJECTOR_MODELS = [
"TencentBAC/Conan-embedding-v1",
architecture="BertModel",
mteb_score=0.688611955,
pooling_type="MEAN",
seq_pooling_type="MEAN",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -24,7 +24,7 @@ ST_PROJECTOR_MODELS = [
"google/embeddinggemma-300m",
architecture="Gemma3TextModel",
mteb_score=0.7473819294684156,
pooling_type="MEAN",
seq_pooling_type="MEAN",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,