[Deprecate] Deprecate pooling multi task support. (#37956)

Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
Signed-off-by: wang.yuqi <noooop@126.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
wang.yuqi
2026-03-24 22:07:47 +08:00
committed by GitHub
parent 352b90c4a4
commit 1b6cb920e6
18 changed files with 566 additions and 66 deletions

View File

@@ -102,7 +102,7 @@ async def test_bge_m3_sparse_plugin_online(
"""Test BGE-M3 sparse plugin in online mode via API."""
request_payload = {
"model": model_config["model_name"],
"task": "token_classify",
"task": "plugin",
"data": {"input": model_config["test_input"], "return_tokens": return_tokens},
}
@@ -166,7 +166,7 @@ def test_bge_m3_sparse_plugin_offline(vllm_runner, return_tokens: bool):
default_torch_num_threads=1,
) as llm_runner:
llm = llm_runner.get_llm()
pooler_output = llm.encode(prompt, pooling_task="token_classify")
pooler_output = llm.encode(prompt, pooling_task="plugin")
outputs = pooler_output[0]
@@ -213,7 +213,7 @@ def test_bge_m3_sparse_plugin_offline_multiple_inputs(vllm_runner):
default_torch_num_threads=1,
) as llm_runner:
llm = llm_runner.get_llm()
pooler_output = llm.encode(prompts, pooling_task="token_classify")
pooler_output = llm.encode(prompts, pooling_task="plugin")
outputs = pooler_output[0]