[Refactor] Separate sequence and token pooling types (#32026)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -162,8 +162,11 @@ def mteb_test_embed_models(
|
||||
assert model_info.architecture in model_config.architectures
|
||||
|
||||
# Confirm whether the important configs in model_config are correct.
|
||||
if model_info.pooling_type is not None:
|
||||
assert model_config.pooler_config.pooling_type == model_info.pooling_type
|
||||
pooler_config = model_config.pooler_config
|
||||
if model_info.seq_pooling_type is not None:
|
||||
assert pooler_config.seq_pooling_type == model_info.seq_pooling_type
|
||||
if model_info.tok_pooling_type is not None:
|
||||
assert pooler_config.tok_pooling_type == model_info.tok_pooling_type
|
||||
if model_info.attn_type is not None:
|
||||
assert model_config.attn_type == model_info.attn_type
|
||||
if model_info.is_prefix_caching_supported is not None:
|
||||
|
||||
@@ -254,8 +254,11 @@ def mteb_test_rerank_models(
|
||||
assert model_config.hf_config.num_labels == 1
|
||||
|
||||
# Confirm whether the important configs in model_config are correct.
|
||||
if model_info.pooling_type is not None:
|
||||
assert model_config.pooler_config.pooling_type == model_info.pooling_type
|
||||
pooler_config = model_config.pooler_config
|
||||
if model_info.seq_pooling_type is not None:
|
||||
assert pooler_config.seq_pooling_type == model_info.seq_pooling_type
|
||||
if model_info.tok_pooling_type is not None:
|
||||
assert pooler_config.tok_pooling_type == model_info.tok_pooling_type
|
||||
if model_info.attn_type is not None:
|
||||
assert model_config.attn_type == model_info.attn_type
|
||||
if model_info.is_prefix_caching_supported is not None:
|
||||
|
||||
@@ -17,7 +17,7 @@ MODELS = [
|
||||
"BAAI/bge-base-en",
|
||||
architecture="BertModel",
|
||||
mteb_score=0.779336792,
|
||||
pooling_type="CLS",
|
||||
seq_pooling_type="CLS",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -54,7 +54,7 @@ MODELS = [
|
||||
"BAAI/bge-m3",
|
||||
architecture="XLMRobertaModel",
|
||||
mteb_score=0.787343078,
|
||||
pooling_type="CLS",
|
||||
seq_pooling_type="CLS",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -65,7 +65,7 @@ MODELS = [
|
||||
"BAAI/bge-code-v1",
|
||||
architecture="Qwen2Model",
|
||||
mteb_score=0.75724465,
|
||||
pooling_type="LAST",
|
||||
seq_pooling_type="LAST",
|
||||
attn_type="decoder",
|
||||
is_prefix_caching_supported=True,
|
||||
is_chunked_prefill_supported=True,
|
||||
@@ -79,7 +79,7 @@ RERANK_MODELS = [
|
||||
"BAAI/bge-reranker-base",
|
||||
architecture="XLMRobertaForSequenceClassification",
|
||||
mteb_score=0.32398,
|
||||
pooling_type="CLS",
|
||||
seq_pooling_type="CLS",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
|
||||
@@ -26,7 +26,7 @@ RERANK_MODELS = [
|
||||
"method": "no_post_processing",
|
||||
},
|
||||
mteb_score=0.33757,
|
||||
pooling_type="LAST",
|
||||
seq_pooling_type="LAST",
|
||||
attn_type="decoder",
|
||||
is_prefix_caching_supported=True,
|
||||
is_chunked_prefill_supported=True,
|
||||
|
||||
@@ -12,7 +12,7 @@ RERANK_MODELS = [
|
||||
RerankModelInfo(
|
||||
"cross-encoder/ms-marco-TinyBERT-L-2-v2",
|
||||
architecture="BertForSequenceClassification",
|
||||
pooling_type="CLS",
|
||||
seq_pooling_type="CLS",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -21,7 +21,7 @@ RERANK_MODELS = [
|
||||
RerankModelInfo(
|
||||
"tomaarsen/Qwen3-Reranker-0.6B-seq-cls",
|
||||
architecture="Qwen3ForSequenceClassification",
|
||||
pooling_type="LAST",
|
||||
seq_pooling_type="LAST",
|
||||
attn_type="decoder",
|
||||
is_prefix_caching_supported=True,
|
||||
is_chunked_prefill_supported=True,
|
||||
|
||||
@@ -18,7 +18,7 @@ MODELS = [
|
||||
"thenlper/gte-large",
|
||||
mteb_score=0.76807651,
|
||||
architecture="BertModel",
|
||||
pooling_type="MEAN",
|
||||
seq_pooling_type="MEAN",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -44,7 +44,7 @@ MODELS = [
|
||||
architecture="GteNewModel",
|
||||
mteb_score=0.775074696,
|
||||
hf_overrides={"architectures": ["GteNewModel"]},
|
||||
pooling_type="CLS",
|
||||
seq_pooling_type="CLS",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -67,7 +67,7 @@ MODELS = [
|
||||
"Alibaba-NLP/gte-Qwen2-1.5B-instruct",
|
||||
mteb_score=0.758473459018872,
|
||||
architecture="Qwen2ForCausalLM",
|
||||
pooling_type="LAST",
|
||||
seq_pooling_type="LAST",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -78,7 +78,7 @@ MODELS = [
|
||||
"Alibaba-NLP/gte-modernbert-base",
|
||||
mteb_score=0.748193353,
|
||||
architecture="ModernBertModel",
|
||||
pooling_type="CLS",
|
||||
seq_pooling_type="CLS",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -89,7 +89,7 @@ MODELS = [
|
||||
"Qwen/Qwen3-Embedding-0.6B",
|
||||
mteb_score=0.771163695,
|
||||
architecture="Qwen3ForCausalLM",
|
||||
pooling_type="LAST",
|
||||
seq_pooling_type="LAST",
|
||||
attn_type="decoder",
|
||||
is_prefix_caching_supported=True,
|
||||
is_chunked_prefill_supported=True,
|
||||
@@ -108,7 +108,7 @@ RERANK_MODELS = [
|
||||
"Alibaba-NLP/gte-reranker-modernbert-base",
|
||||
mteb_score=0.33386,
|
||||
architecture="ModernBertForSequenceClassification",
|
||||
pooling_type="CLS",
|
||||
seq_pooling_type="CLS",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -119,7 +119,7 @@ RERANK_MODELS = [
|
||||
mteb_score=0.33062,
|
||||
architecture="GteNewForSequenceClassification",
|
||||
hf_overrides={"architectures": ["GteNewForSequenceClassification"]},
|
||||
pooling_type="CLS",
|
||||
seq_pooling_type="CLS",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
|
||||
@@ -13,7 +13,7 @@ MODELS = [
|
||||
"intfloat/e5-small",
|
||||
architecture="BertModel",
|
||||
mteb_score=0.742285423,
|
||||
pooling_type="MEAN",
|
||||
seq_pooling_type="MEAN",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -29,7 +29,7 @@ MODELS = [
|
||||
"intfloat/multilingual-e5-base",
|
||||
architecture="XLMRobertaModel",
|
||||
mteb_score=0.779325955,
|
||||
pooling_type="MEAN",
|
||||
seq_pooling_type="MEAN",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
|
||||
@@ -24,7 +24,7 @@ EMBEDDING_MODELS = [
|
||||
mteb_score=0.824413164,
|
||||
architecture="XLMRobertaModel",
|
||||
is_matryoshka=True,
|
||||
pooling_type="MEAN",
|
||||
seq_pooling_type="MEAN",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -36,7 +36,7 @@ RERANK_MODELS = [
|
||||
"jinaai/jina-reranker-v2-base-multilingual",
|
||||
mteb_score=0.33643,
|
||||
architecture="XLMRobertaForSequenceClassification",
|
||||
pooling_type="CLS",
|
||||
seq_pooling_type="CLS",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
|
||||
@@ -24,7 +24,7 @@ RERANK_MODELS = [
|
||||
"mixedbread-ai/mxbai-rerank-base-v2",
|
||||
architecture="Qwen2ForSequenceClassification",
|
||||
hf_overrides=mxbai_rerank_hf_overrides,
|
||||
pooling_type="LAST",
|
||||
seq_pooling_type="LAST",
|
||||
attn_type="decoder",
|
||||
is_prefix_caching_supported=True,
|
||||
is_chunked_prefill_supported=True,
|
||||
|
||||
@@ -19,7 +19,7 @@ EMBEDDING_MODELS = [
|
||||
"nvidia/llama-nemotron-embed-1b-v2",
|
||||
architecture="LlamaBidirectionalModel",
|
||||
mteb_score=0.689164662128673,
|
||||
pooling_type="MEAN",
|
||||
seq_pooling_type="MEAN",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -32,7 +32,7 @@ RERANK_MODELS = [
|
||||
architecture="LlamaBidirectionalForSequenceClassification",
|
||||
chat_template_name="nemotron-rerank.jinja",
|
||||
mteb_score=0.33994,
|
||||
pooling_type="MEAN",
|
||||
seq_pooling_type="MEAN",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
|
||||
@@ -14,7 +14,7 @@ MODELS = [
|
||||
architecture="NomicBertModel",
|
||||
mteb_score=0.737568559,
|
||||
enable_test=True,
|
||||
pooling_type="MEAN",
|
||||
seq_pooling_type="MEAN",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -32,7 +32,7 @@ MODELS = [
|
||||
architecture="NomicBertModel",
|
||||
mteb_score=0.715488912,
|
||||
enable_test=True,
|
||||
pooling_type="MEAN",
|
||||
seq_pooling_type="MEAN",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
|
||||
@@ -27,7 +27,7 @@ RERANK_MODELS = [
|
||||
architecture="Qwen3ForSequenceClassification",
|
||||
hf_overrides=qwen3_reranker_hf_overrides,
|
||||
chat_template_name="qwen3_reranker.jinja",
|
||||
pooling_type="LAST",
|
||||
seq_pooling_type="LAST",
|
||||
attn_type="decoder",
|
||||
is_prefix_caching_supported=True,
|
||||
is_chunked_prefill_supported=True,
|
||||
|
||||
@@ -14,7 +14,7 @@ MODELS = [
|
||||
is_matryoshka=False,
|
||||
architecture="BertModel",
|
||||
mteb_score=0.714927797,
|
||||
pooling_type="CLS",
|
||||
seq_pooling_type="CLS",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -37,7 +37,7 @@ MODELS = [
|
||||
is_matryoshka=False,
|
||||
architecture="NomicBertModel",
|
||||
mteb_score=0.681146831,
|
||||
pooling_type="CLS",
|
||||
seq_pooling_type="CLS",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -54,7 +54,7 @@ MODELS = [
|
||||
is_matryoshka=True,
|
||||
architecture="BertModel",
|
||||
mteb_score=0.649088363,
|
||||
pooling_type="CLS",
|
||||
seq_pooling_type="CLS",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -65,7 +65,7 @@ MODELS = [
|
||||
is_matryoshka=True,
|
||||
architecture="XLMRobertaModel",
|
||||
mteb_score=0.712258299,
|
||||
pooling_type="CLS",
|
||||
seq_pooling_type="CLS",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -76,7 +76,7 @@ MODELS = [
|
||||
is_matryoshka=True,
|
||||
architecture="GteModel",
|
||||
mteb_score=0.706622444,
|
||||
pooling_type="CLS",
|
||||
seq_pooling_type="CLS",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
|
||||
@@ -14,7 +14,7 @@ ST_PROJECTOR_MODELS = [
|
||||
"TencentBAC/Conan-embedding-v1",
|
||||
architecture="BertModel",
|
||||
mteb_score=0.688611955,
|
||||
pooling_type="MEAN",
|
||||
seq_pooling_type="MEAN",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
@@ -24,7 +24,7 @@ ST_PROJECTOR_MODELS = [
|
||||
"google/embeddinggemma-300m",
|
||||
architecture="Gemma3TextModel",
|
||||
mteb_score=0.7473819294684156,
|
||||
pooling_type="MEAN",
|
||||
seq_pooling_type="MEAN",
|
||||
attn_type="encoder_only",
|
||||
is_prefix_caching_supported=False,
|
||||
is_chunked_prefill_supported=False,
|
||||
|
||||
Reference in New Issue
Block a user