[Model][2/N] Improve all pooling task | Support multi-vector retrieval (#25370)

Signed-off-by: wang.yuqi <noooop@126.com>
2025-10-15 19:14:41 +08:00
parent d4d1a6024f
commit f54f85129e
41 changed files with 786 additions and 399 deletions
--- a/tests/entrypoints/pooling/llm/test_classify.py
+++ b/tests/entrypoints/pooling/llm/test_classify.py
@@ -63,7 +63,7 @@ def test_encode_api(llm: LLM):
    # chunked prefill does not support all pooling
    err_msg = "pooling_task must be one of.+"
    with pytest.raises(ValueError, match=err_msg):
-        llm.encode(prompts, use_tqdm=False)
+        llm.encode(prompts, pooling_task="token_classify", use_tqdm=False)


 def test_score_api(llm: LLM):