[Refactor] Separate sequence and token pooling types (#32026)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-01-10 12:53:24 +08:00
committed by GitHub
parent 52d428295d
commit 583a90e005
42 changed files with 324 additions and 204 deletions

View File

@@ -17,7 +17,7 @@ MODELS = [
"BAAI/bge-base-en",
architecture="BertModel",
mteb_score=0.779336792,
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -54,7 +54,7 @@ MODELS = [
"BAAI/bge-m3",
architecture="XLMRobertaModel",
mteb_score=0.787343078,
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,
@@ -65,7 +65,7 @@ MODELS = [
"BAAI/bge-code-v1",
architecture="Qwen2Model",
mteb_score=0.75724465,
pooling_type="LAST",
seq_pooling_type="LAST",
attn_type="decoder",
is_prefix_caching_supported=True,
is_chunked_prefill_supported=True,
@@ -79,7 +79,7 @@ RERANK_MODELS = [
"BAAI/bge-reranker-base",
architecture="XLMRobertaForSequenceClassification",
mteb_score=0.32398,
pooling_type="CLS",
seq_pooling_type="CLS",
attn_type="encoder_only",
is_prefix_caching_supported=False,
is_chunked_prefill_supported=False,