[Refactor] Separate sequence and token pooling types (#32026)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-01-10 12:53:24 +08:00
parent 52d428295d
commit 583a90e005
42 changed files with 324 additions and 204 deletions
--- a/tests/models/language/pooling_mteb_test/test_nemotron.py
+++ b/tests/models/language/pooling_mteb_test/test_nemotron.py
@@ -19,7 +19,7 @@ EMBEDDING_MODELS = [
        "nvidia/llama-nemotron-embed-1b-v2",
        architecture="LlamaBidirectionalModel",
        mteb_score=0.689164662128673,
-        pooling_type="MEAN",
+        seq_pooling_type="MEAN",
        attn_type="encoder_only",
        is_prefix_caching_supported=False,
        is_chunked_prefill_supported=False,
@@ -32,7 +32,7 @@ RERANK_MODELS = [
        architecture="LlamaBidirectionalForSequenceClassification",
        chat_template_name="nemotron-rerank.jinja",
        mteb_score=0.33994,
-        pooling_type="MEAN",
+        seq_pooling_type="MEAN",
        attn_type="encoder_only",
        is_prefix_caching_supported=False,
        is_chunked_prefill_supported=False,